An Incucyte was used to image plates of cells transfected with various mCherry or mSclaret-encoding constructs. Even columns had dox-induced TDP-43 knockdown.
Quantification was performed using CellProfiler.
Additionally, for some experiments Nanopore was performed, which is also quantified.
generate_full_image <- function(combined_positions, all_images, n_rows,
n_columns=6, dim_image=200, spacing_images=5,
spacing_wells=30, background_quantile=0.97){
# This function makes a plot from incucyte data.
# You need to supply a "combined_positions" dataframe that has values for
# plate, well and Order
# Currently it only works when you have four images per well (stupidly named "replicates")
height <- n_rows*(2*dim_image + spacing_images) + (n_rows-1)*spacing_wells + 1
width <- n_columns*(2*dim_image + spacing_images) + (n_columns-1)*spacing_wells + 1
full_image <- matrix(nrow = height, ncol = width, 0)
background <- 10000000 # set arbitrarily large initial value
for(image in all_images){
filename <- word(image, sep="/", start=-1, end = -1)
plate <- paste(ifelse(str_detect(image, "control plate"), 2, 1))
well = word(filename, sep="_", start=2, end=2)
plate_column = as.numeric(str_sub(well, 2, 3))
image_column = ifelse(plate_column %% 6 == 0, 6, plate_column %% 6)
image_row = unique(combined_positions$Order[which(combined_positions$well == well &
combined_positions$Plate == plate)])
if(is.na(image_row)){
next
}
if("replicate" %in% colnames(combined_positions)){
replicate <- combined_positions$replicate[which(combined_positions$filename == filename)]
} else {
replicate = as.numeric(word(filename, sep="_", start=3, end=3))
}
image_matrix <- readImage(image)
smaller <- as.array(EBImage::resize(image_matrix, w=dim_image, h=dim_image))
well_top_left_x = (image_column - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
image_top_left_x = well_top_left_x + ifelse(replicate %in% c(2,4), dim_image + spacing_images, 0)
well_top_left_y = (image_row - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
image_top_left_y = well_top_left_y + ifelse(replicate %in% c(3,4), dim_image + spacing_images, 0)
full_image[image_top_left_y:(image_top_left_y+dim_image-1),
image_top_left_x:(image_top_left_x + dim_image-1)] <- smaller
this_background <- quantile(smaller, 0.05)
if(this_background < background){
background <- this_background
}
}
full_image2 <- full_image - background
img_invert <- Image(1 - t(full_image2/quantile(full_image2, background_quantile)))
return(img_invert)
}
# Red channel images from 96 well plate - internal cryptics
images_96 <- Sys.glob(paste0(data_dir, "oscar 12_05_2022/96 well red/*.png"))
# Red channel images from second 96 well plate - AARS1 upstream and controls
images_24 <- Sys.glob(paste0(data_dir, "oscar 12_05_2022/control plate red/*.png"))
all_images <- c(images_96, images_24)
all_files <- unique(word(all_images, sep="/", start=-1, end = -1))
position_df <- data.frame(filename = all_files) %>%
mutate(well = word(filename, sep="_", 2, 2)) %>%
mutate(Plate = ifelse(str_detect(filename, "96"), 1, 2)) %>%
mutate(col = as.numeric(str_sub(well, 2, 3))) %>%
mutate(Position = paste0(str_sub(well, 1, 1), ifelse(col <= 6, "1-6", "7-12")))
positions <- read_csv("small_data_files/Plate positions for 12_05_2022 incucyte.csv") %>%
mutate(row = 1:n())
## Rows: 20 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Construct, Position, Type
## dbl (2): Plate, Order
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
combined_positions <- position_df %>%
left_join(positions, by = c("Position", "Plate"))
img <- generate_full_image(combined_positions, all_images, n_rows = 14)
display(img)
writeImage(img, "markdown_images/mScar/mScarlet_inverted_ordered.jpeg", quality=90)
combined_positions_mcherry <- combined_positions %>%
select(-Order) %>%
mutate(Order = case_when(Construct == "R3" ~ 1,
Construct == "mCherry" ~ 2,
Construct == "untransfected" ~ 3))
img <- generate_full_image(combined_positions_mcherry, all_images, n_rows = 3)
display(img)
writeImage(img, "markdown_images/mCherry/mCherry_inverted_ordered.jpeg", quality=90)
rm(img)
# Read in the data from each object identitifed by cell profiler
objects_df <- read_csv(paste0(data_dir, "oscar 12_05_2022/cell_profiler_output/incucyte_12_05_2022_lower_thresholdIdentifyPrimaryObjects.csv"))
## Rows: 118819 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): FileName_incucyte_12_05_2022, PathName_incucyte_12_05_2022
## dbl (21): ImageNumber, ObjectNumber, Intensity_IntegratedIntensityEdge_incuc...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Summarise to find the total integrated intensity of each image
integrated_df <- objects_df %>%
group_by(FileName_incucyte_12_05_2022) %>%
mutate(integrated = sum(Intensity_IntegratedIntensity_incucyte_12_05_2022)) %>%
select(FileName_incucyte_12_05_2022, integrated) %>%
unique() %>%
mutate(well = word(FileName_incucyte_12_05_2022, 2, sep="_")) %>%
mutate(row = str_sub(well, 1, 1),
column = as.numeric(str_sub(well, 2, 3))) %>%
mutate(Plate = ifelse(str_detect(FileName_incucyte_12_05_2022, "96"), 1, 2)) %>%
mutate(Position = ifelse(column <= 6, paste0(row, "1-6"), paste0(row, "7-12"))) %>%
left_join(positions, by = c("Position", "Plate")) %>%
mutate(Treatment = ifelse(column %% 2 == 0, "shTDP", "NT")) %>%
group_by(well, Plate) %>%
mutate(mean_red = mean(integrated),
sd_red = sd(integrated))
# Now calculate summaries for each condition
integrated_summary <- integrated_df %>%
ungroup() %>%
group_by(Construct, Treatment) %>%
mutate(mean_condition = ifelse(mean(mean_red) < 0, 0, mean(mean_red)),
sd_condition = sd(mean_red)) %>%
select(Construct, mean_condition, sd_condition, Treatment, Order) %>%
unique() %>%
group_by(Construct) %>%
mutate(mean_NT = max(ifelse(Treatment == "NT", mean_condition, -1000000000000))) %>%
mutate(mean_dox = max(ifelse(Treatment == "shTDP", mean_condition, -100000000000))) %>%
mutate(log2FC = log2(mean_dox/mean_NT)) %>%
mutate(diff_frac = (mean_dox-mean_NT)/mean_dox)
# Produce a plot specifically for the mScarlet ones
good_names_df <- data.frame(Order = c(1:14),
good_name = factor(c(1:13, "+ve"), levels = c(1:13, "+ve")))
integrated_mscar <- integrated_df %>%
filter(!is.na(Order)) %>%
left_join(good_names_df)
## Joining with `by = join_by(Order)`
integrated_summary_mscar <- integrated_summary %>%
filter(!is.na(Order)) %>%
left_join(good_names_df) %>%
mutate(y_pos = 2.4*max(mean_condition+sd_condition))
## Joining with `by = join_by(Order)`
ggplot(integrated_mscar, aes(x = factor(good_name), y=mean_red, colour= Treatment, fill = Treatment)) +
scale_y_log10() +
geom_bar(data = integrated_summary_mscar, aes(x = factor(good_name), y = mean_condition),
position = position_dodge(width = 0.8),
width = 0.8,
stat="identity", alpha = 1) +
geom_point(alpha = 0.2, position = position_dodge(width = 0.8), colour = "grey50") +
geom_errorbar(aes(x = good_name, ymax = (mean_red + sd_red), ymin = (mean_red-sd_red)),
position = position_dodge(width = 0.8), width = 0.2, colour = "grey50") +
geom_text(data = integrated_summary_mscar, aes(x = factor(good_name), y = y_pos,
label = round(log2FC,1)), colour="grey20", size=3.5) +
ggeasy::easy_rotate_x_labels() +
ylab("Red fluorescence intensity") +
xlab("") +
ggpubr::theme_pubclean() +
ggeasy::easy_remove_legend() +
ggsci::scale_fill_npg() +
ggsci::scale_colour_npg()
## Registered S3 methods overwritten by 'car':
## method from
## hist.boot FSA
## confint.boot FSA
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
ggsave("markdown_images/mScar/mScar intensities quantified and ordered.pdf", height = 8, width = 12, units="cm")
## Warning in self$trans$transform(x): NaNs produced
## Warning in self$trans$transform(x): Transformation introduced infinite values
## in continuous y-axis
We also transfected our constructs into SHSY5Y cells to check that it wasn’t specific only SKNBE2 cells
objects_df <- read_csv(paste0(data_dir, "SHSY5Y incucyte/output_lower_threshold/IdentifyPrimaryObjects.csv"))
## Rows: 237882 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): FileName_incucyte_12_05_2022, PathName_incucyte_12_05_2022
## dbl (21): ImageNumber, ObjectNumber, Intensity_IntegratedIntensityEdge_incuc...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_image_files <- word(Sys.glob(paste0(data_dir, "SHSY5Y incucyte/as stored/orange/*.png")), -1, sep="/")
positions <- read_csv("small_data_files/Plate positions for 12_05_2022 incucyte.csv")
## Rows: 20 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Construct, Position, Type
## dbl (2): Plate, Order
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sh_positions = data.frame(row = c("A", "B", "C", "D", "E", "F", "G", "H"),
Construct = c("mScarlet", "A10", "A11", "B5", "B11", "B12", "C2", "C3")) %>%
left_join(positions) %>%
mutate(plasmid = factor(case_when(Construct == "mScarlet" ~ "+ve",
T~paste0(Order)),
levels = c("+ve", "6", "7", "8", "9", "10", "11", "12")))
## Joining with `by = join_by(Construct)`
# Summarise to find the total integrated intensity of each image
integrated_df <- objects_df %>%
group_by(FileName_incucyte_12_05_2022) %>%
mutate(integrated = sum(Intensity_IntegratedIntensity_incucyte_12_05_2022)) %>%
select(FileName_incucyte_12_05_2022, integrated) %>%
unique() %>%
full_join(data.frame(FileName_incucyte_12_05_2022 = all_image_files))
## Joining with `by = join_by(FileName_incucyte_12_05_2022)`
integrated_df[is.na(integrated_df)] <- 0
integrated_df2 <- integrated_df %>%
mutate(well = word(FileName_incucyte_12_05_2022, 2, sep="_")) %>%
mutate(row = str_sub(well, 1, 1),
column = as.numeric(str_sub(well, 2, 3))) %>%
filter(column < 7) %>%
#left_join(positions, by = c("Position", "Plate")) %>%
mutate(Treatment = ifelse(column %% 2 == 0, "shTDP", "NT")) %>%
group_by(well) %>%
mutate(mean_red = mean(integrated),
sd_red = sd(integrated)) %>%
left_join(sh_positions)
## Joining with `by = join_by(row)`
ggplot(integrated_df2, aes(x = column, y = row, fill = mean_red)) +
geom_tile()
summary_df <- integrated_df2 %>%
ungroup() %>%
select(row, mean_red, sd_red, Treatment) %>%
unique() %>%
group_by(row, Treatment) %>%
mutate(mean_condition = mean(mean_red),
sd_condition = sd(mean_red)) %>%
select(row, Treatment, mean_condition, sd_condition) %>%
unique() %>%
left_join(sh_positions) %>%
mutate()
## Joining with `by = join_by(row)`
ggplot(integrated_df2, aes(x = plasmid, y=mean_red, colour= Treatment, fill = Treatment)) +
geom_bar(data = summary_df, aes(x = plasmid, y = mean_condition),
position = position_dodge(width = 0.8),
width = 0.8,
stat="identity", alpha = 1) +
geom_point(alpha = 0.2, position = position_dodge(width = 0.8), colour = "grey50") +
geom_errorbar(aes(x = plasmid, ymax = (mean_red + sd_red), ymin = (mean_red-sd_red)),
position = position_dodge(width = 0.8), width = 0.2, colour = "grey50") +
ggeasy::easy_rotate_x_labels() +
ylab("Red fluorescence intensity") +
xlab("mScarlet Construct #") +
ggpubr::theme_pubclean() +
ggsci::scale_fill_npg() +
ggsci::scale_color_npg() +
ggeasy::easy_move_legend("right")
ggsave("markdown_images/SHSY5Y mScarlet/quantification of SHSY5Y mScarlet.pdf", height = 8, width = 12, units = "cm")
sk_sh <- integrated_summary_mscar %>%
ungroup() %>%
select(Order, sk = mean_dox) %>%
unique() %>%
inner_join(summary_df %>% filter(Treatment == "shTDP") %>% select(Order, sh = mean_condition)) %>%
filter(Order != 14) # remove positive control
## Adding missing grouping variables: `row`, `Treatment`
## Joining with `by = join_by(Order)`
ggplot(sk_sh, aes(x = sk, y = sh)) +
scale_x_log10() +
scale_y_log10() +
geom_smooth(method="lm", se=F) +
geom_point() +
xlab("Fluoresence in SK-N-BE(2) shTDP") +
ylab("Fluoresence in SH-SY5Y shTDP") +
ggpubr::theme_classic2() +
geom_text(aes(x=1300,y=30,label=paste0("r = ", round(cor(sk, sh, method="spearman"),2)))) +
geom_label_repel(aes(label = paste0(Order)), alpha = 0.8)
## `geom_smooth()` using formula = 'y ~ x'
ggsave("markdown_images/SHSY5Y mScarlet/SHSY5Y versus SKNDZ.pdf", height = 8, width = 8, units = "cm")
## `geom_smooth()` using formula = 'y ~ x'
good_names_mCherry <- c("Cryptic\nmCherry", "Constitutive\nmCherry", "-ve")
good_names_mCherry_df <- data.frame(good_name = factor(good_names_mCherry, levels = good_names_mCherry),
Construct = c("R3", "mCherry", "untransfected"))
integrated_mCherry <- integrated_df2 %>%
left_join(good_names_mCherry_df) %>%
select(-Order) %>%
mutate(Order = case_when(Construct == "R3" ~ 1,
Construct == "mCherry" ~ 2,
Construct == "untransfected" ~ 3)) %>%
filter(!is.na(good_name))
## Joining with `by = join_by(Construct)`
integrated_summary_mCherry <- integrated_summary %>%
left_join(good_names_mCherry_df) %>%
select(-Order) %>%
mutate(Order = case_when(Construct == "R3" ~ 1,
Construct == "mCherry" ~ 2,
Construct == "untransfected" ~ 3)) %>%
filter(!is.na(good_name)) %>%
mutate(y_pos = 2.4*max(mean_condition+sd_condition))
## Joining with `by = join_by(Construct)`
ggplot(integrated_mCherry, aes(x = factor(good_name), y=mean_red, colour= Treatment, fill = Treatment)) +
scale_y_log10() +
geom_bar(data = integrated_summary_mCherry, aes(x = factor(good_name), y = mean_condition),
position = position_dodge(width = 0.8),
width = 0.8,
stat="identity", alpha = 1) +
geom_point(alpha = 0.2, position = position_dodge(width = 0.8), colour = "grey50") +
geom_errorbar(aes(x = good_name, ymax = (mean_red + sd_red), ymin = (mean_red-sd_red)),
position = position_dodge(width = 0.8), width = 0.2, colour = "grey50") +
geom_text(data = integrated_summary_mCherry, aes(x = factor(good_name), y = y_pos,
label = round(log2FC,1)), colour="grey20", size=3.5) +
ggeasy::easy_rotate_x_labels() +
ylab("Red fluorescence intensity") +
xlab("") +
ggpubr::theme_pubclean() +
ggeasy::easy_remove_legend() +
ggsci::scale_fill_npg() +
ggsci::scale_color_npg()
ggsave("markdown_images/mCherry/mCherry intensities quantified.pdf", height = 7, width = 8, units="cm")
rm(list=setdiff(ls(), "data_dir"))
We swapped out the AARS1 cryptic exon for a library of potential CEs, each encoding the same part of Cas9.
Note that “UPI” stands for “Unique plasmid identifier” i.e. it’s a barcode
This code relies on the output of the python scripts “extract_umis_and_Seqs.py” and “splice_ai_of_cas.py” which are run using a snakemake pipeline.
min_reads <- 7
dox <- read_csv(paste0(data_dir, "Cas9_mCherry/sequencing_analysis/csv/C9D.csv.gz")) %>%
mutate(treatment = "shTDP") %>%
left_join(read_csv(paste0(data_dir, "Cas9_mCherry/sequencing_analysis/csv/C9D_consensus.csv")))
## Rows: 137952 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): upi, umi, CE_seq
## dbl (5): intron1_IR, intron2_IR, both_IR, no_CE, with_CE
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 25300 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): upi, seq
## dbl (1): n
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining with `by = join_by(upi)`
nt <- read_csv(paste0(data_dir, "Cas9_mCherry/sequencing_analysis/csv/C9N.csv.gz")) %>%
mutate(treatment = "NT") %>%
left_join(read_csv(paste0(data_dir, "Cas9_mCherry/sequencing_analysis/csv/C9N_consensus.csv")))
## Rows: 123047 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): upi, umi, CE_seq
## dbl (5): intron1_IR, intron2_IR, both_IR, no_CE, with_CE
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 24378 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): upi, seq
## dbl (1): n
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining with `by = join_by(upi)`
pred <- read_csv(paste0(data_dir, "Cas9_mCherry/sequencing_analysis/spliceai/spliceai_results.csv"))
## Rows: 2637 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): upi
## dbl (2): acc, don
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#pred <- read_csv(paste0(data_dir, "Cas9_mCherry/sequencing_analysis/spliceai/yo.csv"))
# Combine data from each and calculate the fraction of reads that are cryptic etc
# Filter just for plasmids which were detected in both
combined <- bind_rows(dox, nt) %>%
# remove PCR duplicates
distinct(treatment, upi, umi, .keep_all = T) %>%
# get summaries
group_by(treatment, upi) %>%
# analyse fractions
mutate(total = sum(with_CE + no_CE + intron1_IR + intron2_IR + both_IR)) %>%
mutate(frac = sum(with_CE)/total) %>%
select(upi, treatment, frac, total, seq, total) %>%
unique() %>%
ungroup() %>%
filter(!is.na(frac))
# Filter for those that we have sequence data for and pivot wider
pivoted <- combined %>%
ungroup() %>%
dplyr::select(upi, treatment, frac, seq, total) %>%
pivot_wider(names_from = treatment, values_from = c("frac", "seq", "total")) %>%
ungroup() %>%
mutate(seq2 = case_when(is.na(seq_shTDP) & is.na(seq_NT) ~ "unclear",
is.na(seq_shTDP) ~ seq_NT,
is.na(seq_NT) ~ seq_shTDP,
seq_shTDP != seq_NT ~ "different",
seq_shTDP == seq_NT ~ seq_NT)) %>%
mutate(seq2 = ifelse(seq2 == "unclear", NA, seq2))
pivoted_just_good <- pivoted %>%
select(-seq_shTDP, -seq_NT) %>%
left_join(pred) %>%
filter(!str_detect(seq2, "N"),
str_length(seq2) == 154) %>%
filter(!is.na(acc)) %>%
mutate(score = acc+don)
## Joining with `by = join_by(upi)`
plot_nt_spliceai <- ggplot(pivoted_just_good %>%
filter(total_NT > min_reads), aes(x = 100*frac_NT, y = score, colour="grey20")) +
geom_hex(bins=20) +
scale_fill_gradient2(low="grey100", high="orange", mid = "grey100") +
theme_classic() +
scale_color_identity() +
geom_text(aes(x=90,y=0.1,label=paste0("rho = ", round(cor(score, frac_NT, method="spearman"),2)))) +
xlab("% Inclusion") +
ylab("Combined SpliceAI Score")
plot_dox_spliceai <- ggplot(pivoted_just_good %>%
filter(total_shTDP > min_reads), aes(x = 100*frac_shTDP, y = score, colour="grey20")) +
geom_hex(bins=20) +
scale_fill_gradient2(low="grey100", high="orange", mid = "grey100") +
theme_classic() +
scale_color_identity() +
geom_text(aes(x=90,y=0.1,label=paste0("rho = ", round(cor(score, frac_shTDP, method="spearman"),2)))) +
xlab("% Inclusion") +
ylab("Combined SpliceAI Score")
#ggsave("markdown_images/mCherry/splice_correlations.pdf", height = 7, width = 17, units="cm")
pivoted_high_both <- pivoted_just_good %>%
filter(total_shTDP > min_reads & total_NT > min_reads)
plot_psi_comparison <- ggplot(pivoted_high_both, aes(x = 100*frac_NT, y = 100*frac_shTDP, colour = "grey20")) +
geom_hex(bins=20) +
scale_fill_gradient2(low="grey100", high="orange", mid = "grey100") +
theme_classic() +
scale_color_identity() +
ylab("CE PSI shTDP") +
xlab("CE PSI NT")
(plot_psi_comparison | plot_psi_comparison) / (plot_nt_spliceai | plot_dox_spliceai)
ggsave("markdown_images/mCherry/all_three_cas9_plots.pdf", height=14, width=18, units="cm")
objects_df <- read_csv(paste0(data_dir, "Cas9_mCherry/cell_profiler_output/cas9_design2_mChIdentifyPrimaryObjects.csv"))
## Rows: 42388 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): FileName_incucyte_12_05_2022, PathName_incucyte_12_05_2022
## dbl (21): ImageNumber, ObjectNumber, Intensity_IntegratedIntensityEdge_incuc...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plasmids = c("+ve", "A11", "Cas9-mCh", "-ve")
position_df <- data.frame(row = c("A", 'B', 'C', 'D'),
plasmid = factor(plasmids, levels = c("+ve", "-ve", "Cas9-mCh", "A11")))
# Summarise to find the total integrated intensity of each image
integrated_df <- objects_df %>%
group_by(FileName_incucyte_12_05_2022) %>%
mutate(integrated = sum(Intensity_IntegratedIntensity_incucyte_12_05_2022)) %>%
select(FileName_incucyte_12_05_2022, integrated) %>%
unique() %>%
mutate(well = word(FileName_incucyte_12_05_2022, 2, sep="_")) %>%
mutate(row = str_sub(well, 1, 1),
column = as.numeric(str_sub(well, 2, 3))) %>%
mutate(Treatment = ifelse(column %% 2 == 0, "shTDP", "NT")) %>%
left_join(position_df) %>%
group_by(plasmid, Treatment) %>%
mutate(average_intensity = mean(integrated),
sd_intensity = sd(integrated))
## Joining with `by = join_by(row)`
summary_df <- integrated_df %>%
select(average_intensity, sd_intensity) %>%
unique()
## Adding missing grouping variables: `plasmid`, `Treatment`
log2fc_df <- summary_df %>%
ungroup() %>%
group_by(plasmid) %>%
pivot_wider(names_from = Treatment, values_from = c("average_intensity", "sd_intensity")) %>%
mutate(log2fc = log2(average_intensity_shTDP/average_intensity_NT))
log2fc_summary_df <- summary_df %>%
left_join(log2fc_df %>% select(plasmid, log2fc)) %>%
ungroup() %>%
group_by(plasmid) %>%
mutate(ymax = max(average_intensity+sd_intensity/sqrt(3))) %>%
ungroup() %>%
select(plasmid, ymax, log2fc) %>%
unique()
## Joining with `by = join_by(plasmid)`
ggplot(summary_df %>% filter(plasmid != "A11"),
aes(x = plasmid, y = average_intensity)) +
geom_bar(stat="identity", position="dodge",
aes(fill = Treatment)) +
geom_errorbar(aes(ymin = average_intensity-sd_intensity/sqrt(3),
ymax = average_intensity+sd_intensity/sqrt(3),
fill = Treatment),
position="dodge") +
ylab("Integrated Intensity (arbitrary units)") +
xlab("") +
ggtitle("Cas9 Design 2",
"N=4, error bar=SEM, log2fc shown") +
theme_classic() +
ggeasy::easy_remove_legend_title() +
ggsci::scale_fill_npg() #+
## Warning in geom_errorbar(aes(ymin = average_intensity - sd_intensity/sqrt(3), :
## Ignoring unknown aesthetics: fill
# geom_text(data = log2fc_summary_df %>%
# filter(plasmid!="A11") , aes(x = plasmid, y = ymax+3, label = signif(log2fc, 2)))
ggsave("markdown_images/mCherry/Cas9_mCherry_quantification.pdf", height = 8, width = 10, units= "cm")
rm(list=setdiff(ls(), "data_dir"))
To help demonstrate that the expression of mScarlet above is specifically due to TDP-43 knockdown, we tried a similar experiment to before, except that either function or non-functional (2FL mutant) TDP-43/Raver1 fusion protein, which is known to rescue TDP-43 loss of splicing function, was co-transfected.
mini_spacing = 5
big_spacing = 15
dim_image = 200
norm_quantile <- 0.995
background_quantile <- 0.02
# Find all relevant image files
image_files <- Sys.glob(paste0(data_dir, "raver rescue/orange/*.png"))
# Assign the layout
image_df <- data.frame(filename = word(image_files, -1, sep="/"),
full_dir = image_files) %>%
mutate(well = word(filename, 2, sep="_")) %>%
mutate(row = str_sub(well,1,1)) %>%
mutate(column = as.numeric(str_sub(well, 2, -1)),
image_number = as.numeric(word(filename, 3, sep="_"))) %>%
mutate(big_row = ifelse(column <= 6, 1, 2),
small_row = ifelse(image_number > 2, 2, 1),
big_column = ifelse(column <= 6, column, column - 6),
small_column = ifelse(image_number > 2, image_number - 2, image_number)) %>%
mutate(n_mini_spaces_x = big_column - 1 + small_column - 1,
n_big_spaces_x = big_column - 1,
n_mini_spaces_y = big_row - 1 + small_row - 1,
n_big_spaces_y = big_row - 1,
n_previous_images_x = 2*(big_column-1) + small_column - 1,
n_previous_images_y = 2*(big_row-1) + small_row - 1) %>%
mutate(start_x = 1+n_big_spaces_x*big_spacing + n_mini_spaces_x*mini_spacing + n_previous_images_x*dim_image) %>%
mutate(start_y = 1+n_big_spaces_y*big_spacing + n_mini_spaces_y*mini_spacing + n_previous_images_y*dim_image)
# Find common normalisation factor
for(filename in image_df$full_dir){
if(filename == image_df$full_dir[1]){
all_vals <- sample(as.vector(readImage(filename)), 100)
} else {
all_vals <- c(all_vals, sample(as.vector(readImage(filename)), 100))
}
}
normalisation <- quantile(all_vals, norm_quantile)
background_df <- image_df %>% filter(row == "A")
# Find background, using only Row A as these were untransfected
for(filename in background_df$full_dir){
if(filename == background_df$full_dir[1]){
all_vals2 <- sample(as.vector(readImage(filename)), 100)
} else {
all_vals2 <- c(all_vals2, sample(as.vector(readImage(filename)), 100))
}
}
background <- quantile(all_vals, background_quantile)
for(this_row in unique(image_df$row)){
row_df <- image_df %>%
filter(row == this_row)
height = max(row_df$start_y) + dim_image
width = max(row_df$start_x) + dim_image
full_image <- matrix(nrow = height, ncol = width, 0)
for(i in 1:nrow(row_df)){
this_image <- readImage(row_df$full_dir[i])
smaller <- as.array(EBImage::resize(this_image, w=dim_image, h=dim_image))
start_x = row_df$start_x[i]
start_y = row_df$start_y[i]
full_image[start_y:(start_y+dim_image-1), start_x:(start_x+dim_image-1)] <- smaller
}
img <- Image(1-t((full_image-background)/normalisation))
writeImage(img, paste0("markdown_images/individual_raver_rescue/Row", this_row, ".jpeg") , quality=90)
}
calculate_errors_y_over_x <- function(x_mean, x_sem, y_mean, y_sem, quant, n = 10000, log2fc = F){
x_vals <- rnorm(n=n, mean=x_mean, sd = x_sem)
y_vals <- rnorm(n=n, mean=y_mean, sd = y_sem)
if(log2fc){
ratios <- log2(y_vals/x_vals)
ratios <- ratios[!is.na(ratios)]
} else {
ratios <- y_vals/x_vals
}
return(quantile(ratios, quant))
}
objects_df <- read_csv(paste0(data_dir, "raver rescue/cell_profiler_output/incuyte rescue analysisIdentifyPrimaryObjects.csv"))
## Rows: 169619 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): FileName_incucyte_12_05_2022, PathName_incucyte_12_05_2022
## dbl (21): ImageNumber, ObjectNumber, Intensity_IntegratedIntensityEdge_incuc...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plasmids <- c("mScarlet", "A10", "A11", "B5", "B11", "B12", "C2")
plasmid_df <- data.frame(row = c('B', 'C', 'D', 'E', 'F', 'G', 'H'),
plasmid = factor(plasmids, levels = plasmids)) %>%
mutate(TDP_REG = ifelse(plasmid == "mScarlet", F, T))
positions <- read_csv("small_data_files/Plate positions for 12_05_2022 incucyte.csv")
## Rows: 20 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Construct, Position, Type
## dbl (2): Plate, Order
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
integrated_df <- objects_df %>%
group_by(FileName_incucyte_12_05_2022) %>%
mutate(integrated = sum(Intensity_IntegratedIntensity_incucyte_12_05_2022)) %>%
select(FileName_incucyte_12_05_2022, integrated) %>%
unique() %>%
mutate(well = word(FileName_incucyte_12_05_2022, 2, sep="_")) %>%
mutate(row = str_sub(well, 1, 1),
column = as.numeric(str_sub(well, 2, 3))) %>%
mutate(dox = ifelse(column %% 2 == 0, "shTDP", "NT"),
construct = ifelse(column <= 6, "Raver", "2FL")) %>%
ungroup() %>%
group_by(dox, construct, row, column) %>%
mutate(average_intensity_well = mean(integrated)) %>%
ungroup() %>%
group_by(dox, construct, row) %>%
select(average_intensity_well) %>%
unique() %>%
mutate(average_intensity = mean(average_intensity_well),
sd_intensity = sd(average_intensity_well))
## Adding missing grouping variables: `dox`, `construct`, `row`
summary_df <- integrated_df %>%
select(average_intensity, sd_intensity) %>%
unique() %>%
filter(row != "A")
## Adding missing grouping variables: `dox`, `construct`, `row`
log2fc_df <- summary_df %>%
ungroup() %>%
group_by(dox) %>%
pivot_wider(names_from = construct, values_from = c("average_intensity", "sd_intensity")) %>%
mutate(log2fc = log2(average_intensity_2FL/average_intensity_Raver)) %>%
left_join(plasmid_df) %>%
left_join(positions %>% dplyr::rename(plasmid = Construct)) %>%
mutate(good_name = factor(ifelse(plasmid == "mScarlet", "+ve", Order),
levels = c("+ve", "6", "7", "9",
"10", "11", "12")))
## Joining with `by = join_by(row)`
## Joining with `by = join_by(plasmid)`
log2fc_df$log2fc_5pc <- mapply(calculate_errors_y_over_x,
log2fc_df$average_intensity_Raver,
sqrt(log2fc_df$sd_intensity_Raver),
log2fc_df$average_intensity_2FL,
sqrt(log2fc_df$sd_intensity_2FL),
0.05,
log2fc = T)
## Warning in (function (x_mean, x_sem, y_mean, y_sem, quant, n = 10000, log2fc =
## F) : NaNs produced
## Warning in (function (x_mean, x_sem, y_mean, y_sem, quant, n = 10000, log2fc =
## F) : NaNs produced
## Warning in (function (x_mean, x_sem, y_mean, y_sem, quant, n = 10000, log2fc =
## F) : NaNs produced
log2fc_df$log2fc_95pc <- mapply(calculate_errors_y_over_x,
log2fc_df$average_intensity_Raver,
sqrt(log2fc_df$sd_intensity_Raver),
log2fc_df$average_intensity_2FL,
sqrt(log2fc_df$sd_intensity_2FL),
0.95,
log2fc = T)
## Warning in (function (x_mean, x_sem, y_mean, y_sem, quant, n = 10000, log2fc =
## F) : NaNs produced
## Warning in (function (x_mean, x_sem, y_mean, y_sem, quant, n = 10000, log2fc =
## F) : NaNs produced
## Warning in (function (x_mean, x_sem, y_mean, y_sem, quant, n = 10000, log2fc =
## F) : NaNs produced
ggplot(log2fc_df %>% filter(dox == "shTDP"),
aes(x = good_name, y = 100*(2^log2fc)-100, fill = TDP_REG)) +
geom_bar(stat="identity") +
ggtitle("Increase in intensity when using\nmutant TDP/Raver1 instead of WT") +
ylab("% increase with 2FL") +
geom_errorbar(aes(ymin = 100*(2^log2fc_5pc)-100, ymax = 100*(2^log2fc_95pc)-100,
width=0.5)) +
scale_fill_viridis_d() +
xlab("mScarlet construct") +
ggpubr::theme_pubclean() +
ggeasy::easy_remove_legend()
ggsave("markdown_images/mScar/raver_rescue_quantification.pdf", height = 7, width = 10, units="cm")
rm(list=setdiff(ls(), "data_dir"))
We did lots of Nanopore. We define a term: “productively spliced”. This refers the the % of transcripts that have splicing producing a mature mRNA with the full, uninterrupted CDS for producing the functional protein of interest, and not predicted to be NMD sensitive.
Nanopore analysis here relies primarily on CSV files generated by the python function “extract_splice_junctions_from_bam.py”
Additionally, some plots rely on “pileups” which were generated using a custom pysam-based script, “perform_pileups.py”.
# Read in splice junction CSVs
all_junction_counts <- Sys.glob(paste0(data_dir,
"Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/*csv.gz"))
for(f in all_junction_counts){
print(f)
if(f == all_junction_counts[1]){
n <- 0
}
bc_f <- word(word(f, sep="/", -1, -1), sep="_", 1, 1)
bc_r <- word(word(word(f, sep="/", -1, -1), sep="_", 2, 2), sep="\\.", 1)
this_csv <- read_csv(f, col_types = cols(.default = "?", mapping_quality = "d")) %>%
mutate(bc_f = bc_f,
bc_r = bc_r)
if(nrow(this_csv) == 0){
next
}
n = n+1
if(n == 1){
all_csvs <- this_csv
} else {
all_csvs <- bind_rows(all_csvs, this_csv)
}
}
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f1_r9.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f2_r9.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f3_r9.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f4_r9.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f5_r9.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f6_r9.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f7_r9.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r1.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r10.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r11.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r12.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r2.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r3.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r4.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r5.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r6.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r7.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r8.csv.gz"
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions 96 well/junction_counts/f8_r9.csv.gz"
# Filter just for good alignments
primary <- all_csvs %>%
mutate(flag_string = ifelse(is.na(flag_string), "", flag_string)) %>%
filter(!str_detect(flag_string, "not primary|supplementary")) %>%
filter(mapping_quality > 50) %>%
mutate(reference = factor(reference, levels = c("d1s", "d2s", "d3s", "A10",
"A11", "B1", "B2", "B4", "B5",
"B6", "B11", "B12", "C2", "C3",
"E1", "E4"))) %>%
filter(!(reference %in% c("d1s", "d2s", "d3s"))) %>%
filter(!str_detect(bc_f, "-1")) %>%
filter(!str_detect(bc_r, "-1")) %>%
mutate(barcode1 = as.numeric(str_sub(bc_f, 2, 3)),
barcode2 = as.numeric(str_sub(bc_r, 2, 3)))
primary$junctions[is.na(primary$junctions)] <- ""
# Filter for those that have the expected barcode
positions <- read_csv("small_data_files/Plate positions for 12_05_2022 incucyte.csv") %>%
mutate(row = 1:n()) %>%
filter(Plate == 1)
## Rows: 20 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Construct, Position, Type
## dbl (2): Plate, Order
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
forward_bc_df = data.frame(barcode1 = 1:8, row = str_split("ABCDEFGH", "", simplify = T)[,1:8])
df2 <- primary %>%
filter(barcode1 > 0 & barcode2 > 0) %>%
ungroup() %>%
left_join(forward_bc_df, by = "barcode1") %>%
mutate(well = paste0(row, barcode2)) %>%
mutate(Position = paste0(str_sub(well, 1, 1), ifelse(barcode2 <= 6, "1-6", "7-12"))) %>%
left_join(positions, by = "Position") %>%
mutate(Construct = str_replace(str_replace(Construct, "Design ", "d"), " stronger", "s")) %>%
filter(!(Construct %in% c("d1s", "d2s", "d3s"))) %>%
ungroup()
n_before_filter <- sum(df2$number_of_reads)
df2 <- df2 %>%
filter(Construct == reference)
n_after_filter <- sum(df2$number_of_reads)
accuracy = 100*(n_after_filter/n_before_filter)
print(paste("Accuracy =", accuracy, "%"))
## [1] "Accuracy = 99.3772200152704 %"
######## Analyse splicing #########
gtf <- read_tsv(paste0(data_dir, "Nanopore from first mScarlet mCherry 96 plates/combined_gtf.gtf"),
col_names = 1:9)
## Rows: 63 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (7): X1, X2, X3, X6, X7, X8, X9
## dbl (2): X4, X5
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Calculate what the product splicing junctions should be from the GTF
gtf_productive <- gtf %>%
group_by(X1) %>%
filter(X9 %in% c("productive", "downstream")) %>%
arrange(X4) %>%
mutate(junc_start = X5-1,
junc_end = lead(X4)-1) %>%
dplyr::rename(Construct = X1) %>%
filter(X9 == "productive") %>%
mutate(this_splice_string = paste0(junc_start, "-", junc_end)) %>%
mutate(productive_splice_string = paste0(this_splice_string, collapse = ";")) %>%
mutate(just_downstream_splice_string = last(this_splice_string)) %>%
select(Construct, productive_splice_string, just_downstream_splice_string) %>%
distinct()
# Calculate the fraction that is productively spliced for each condition
df3 <- df2 %>%
ungroup() %>%
left_join(gtf_productive %>% select(Construct, productive_splice_string, just_downstream_splice_string)) %>%
mutate(downstream_is_spliced = str_detect(junctions, just_downstream_splice_string)) %>%
mutate(is_productive = productive_splice_string == junctions) %>%
group_by(well) %>%
mutate(n_productive = sum(as.numeric(is_productive)*number_of_reads)) %>%
mutate(n_total = sum(number_of_reads)) %>%
mutate(fraction_productive = n_productive/n_total) %>%
ungroup() %>%
mutate(treatment = factor(ifelse(barcode2 %% 2 == 0, "Dox", "NT"), levels = c("NT", "Dox"))) %>%
select(well, treatment, Construct, fraction_productive) %>%
unique()
## Joining with `by = join_by(Construct)`
df3_summary <- df3 %>%
group_by(Construct, treatment) %>%
mutate(m = mean(fraction_productive),
sd = sd(fraction_productive)) %>%
select(Construct, treatment, m, sd) %>%
unique() %>%
left_join(positions %>% select(Construct, Order)) %>%
mutate(good_name = factor(ifelse(Construct == "mScarlet", "+ve", Order),
levels = c("+ve", "6", "7", "9",
"10", "11", "12")))
## Joining with `by = join_by(Construct)`
ggplot(df3_summary, aes(x = treatment, fill = treatment, y = 100*m))+
geom_bar(stat="identity") +
geom_errorbar(aes(ymin = 100*(m-sd), ymax = 100*(m+sd)), position = "dodge", width = 0.3) +
facet_wrap(~Order, scales = "free_y", nrow=2) +
ylim(0, NA) +
ylab("% Productive Transcript") +
xlab("") +
ggpubr::theme_pubr() +
theme(legend.position = "right", plot.title = element_text(size = 6),
text = element_text(size = 10),
axis.text=element_text(size=8),
axis.text.x=element_blank()) +
ggeasy::easy_remove_legend() +
xlab("") +
ggsci::scale_fill_npg()
ggsave("markdown_images/mScar/96_well_productive_transcript_plot.pdf",
height = 5, width = 10, units = "cm")
rm(list=setdiff(ls(), "data_dir"))
Let’s also make a pileup of an example mScarlet construct to illustrate that the splicing is as designed by SpliceNouveau
files3 <- Sys.glob(paste0(data_dir,
"Nanopore from first mScarlet mCherry 96 plates/pileup 96 well/pileup/*.csv.gz"))
# Get index of files for cols 5 and 6 in the plate for the best internal cryptic
nt <- which(str_detect(files3, "f3_r11"))
dox <- which(str_detect(files3, "f3_r12"))
for(i in c(nt, dox)){
this_file <- files3[i]
this_pu <- read_csv(this_file) %>%
# Ensure that we only look for the correct alignment
group_by(reference_name) %>%
mutate(n_this_rname = sum(n)) %>%
ungroup() %>%
filter(n_this_rname == max(n_this_rname)) %>%
# Get coverage
filter(position %% 1 == 0) %>%
group_by(position) %>%
mutate(score = ifelse(nt == "del", 0, n)) %>%
mutate(n_at_pos = sum(score)) %>%
select(n_at_pos) %>%
unique() %>%
mutate(sample = ifelse(i == nt, "NT", "shTDP")) %>%
ungroup() %>%
mutate(frac = n_at_pos / max(n_at_pos))
if(i == nt){
coverage_df <- this_pu
} else {
coverage_df <- bind_rows(coverage_df, this_pu)
}
}
## Rows: 57675 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Adding missing grouping variables: `position`
## Rows: 53645 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Adding missing grouping variables: `position`
ggplot(coverage_df %>% ungroup(), aes(x = position, y = n_at_pos)) +
geom_area() +
facet_wrap(~sample, ncol=1, scales="free_y") +
theme_void()
ggsave("markdown_images/mScar/B11_nano_trace.pdf")
## Saving 7 x 5 in image
Although the above mScarlet and this mCherry experiments were performed in parallel, the Nanopore was done separately
# Read in splice junction CSVs
all_csv <- Sys.glob(paste0(data_dir,
"Nanopore from first mScarlet mCherry 96 plates/extracted splice junctions design1 mcherry/*.gz"))
for(filename in all_csv){
this_df <- read_csv(filename) %>%
filter(!str_detect(flag_string, "not primary|supplementary")) %>%
filter(reference == "r3", # this is the code given to cryptic mCherry vector
str_detect(junctions, "-1564"),
mapping_quality > 50,
first_pos < 80,
last_pos > 1650) %>%
mutate(productive = junctions == "175-339;426-655;1470-1564") %>%
mutate(n_productive = sum(as.numeric(productive)*number_of_reads)) %>%
mutate(frac_productive = n_productive/sum(number_of_reads)) %>%
mutate(barcode = as.numeric(word(word(word(filename, -1, -1, sep="/"), 1, 1, sep="\\."), -1, -1, sep="_"))) #%>%
# dplyr::select(barcode, frac_productive) %>%
# unique()
if(filename == all_csv[1]){
all_df <- this_df
} else {
all_df <- bind_rows(all_df, this_df)
}
}
## Rows: 298 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 185 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 247 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 263 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 246 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 263 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 336 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 183 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 515 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 247 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 269 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 168 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_df2 <- all_df %>%
mutate(treatment = ifelse(barcode %% 2 == 0, "shTDP", "NT")) %>%
mutate(construct = factor(ifelse(barcode <= 6, "Cryptic\nmCherry", "Constitutive\nmCherry"),
levels=c("Cryptic\nmCherry", "Constitutive\nmCherry"))) %>%
group_by(construct, treatment) %>%
mutate(mean_pc = 100*mean(frac_productive),
sd_pc = 100*sd(frac_productive)) %>%
select(construct, treatment, mean_pc, sd_pc) %>%
unique()
ggplot(all_df2 %>% filter(construct == "Cryptic\nmCherry"),
aes(x = construct, y = mean_pc, fill = treatment)) +
geom_bar(position=position_dodge(width=0.9), stat = "identity", width=0.9) +
geom_errorbar(aes(ymin = mean_pc-sd_pc, ymax = mean_pc+sd_pc), position = position_dodge(width=0.9), colour="black", width=0.4) +
ggpubr::theme_pubclean() +
ggeasy::easy_add_legend_title("") +
ylab("% Productive\ntranscript") +
xlab("") +
theme(legend.position="right", text = element_text(size = 9)) +
#ggeasy::easy_rotate_x_labels(side="right") +
ggsci::scale_fill_npg()
ggsave("markdown_images/mCherry/nanopore analysis design 1 mcherry.pdf",
height = 5, width = 7, units = "cm")
rm(list=setdiff(ls(), "data_dir"))
files <- Sys.glob(paste0(data_dir,
"Nanopore of triple cryptic cre/junction_counts/*.csv.gz"))
files <- files[which(!str_detect(files, "-1"))]
file <- files[1]
for(file in files){
print(file)
bc1 <- word(word(word(file, sep="/", -1, -1), sep="\\.", 1, 1), sep="_", 1, 1)
bc2 <- word(word(word(file, sep="/", -1, -1), sep="\\.", 1, 1), sep="_", 2, 2)
csv <- read_csv(file) %>%
filter(reference == "triple_cryptic_cre") %>%
filter(!str_detect(flag_string, "not primary|supplementary")) %>%
filter(!is.na(junctions)) %>%
mutate(first_cryptic = str_detect(junctions, "235-399;492-721"),
second_cryptic = str_detect(junctions, "820-984;1083-1312"),
third_cryptic = str_detect(junctions, "1589-1753;1862-2091")) %>%
mutate(bc1 = bc1,
bc2 = bc2) %>%
mutate(dox = str_detect(bc2, "2|4|6"))
if(file == files[1]){
cryptic_df <- csv
} else {
cryptic_df <- bind_rows(cryptic_df, csv)
}
}
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f10_r1.csv.gz"
## Rows: 346 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f10_r2.csv.gz"
## Rows: 552 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f10_r3.csv.gz"
## Rows: 138 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f10_r4.csv.gz"
## Rows: 558 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f10_r5.csv.gz"
## Rows: 201 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f10_r6.csv.gz"
## Rows: 350 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f11_r1.csv.gz"
## Rows: 45 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f11_r2.csv.gz"
## Rows: 75 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f11_r3.csv.gz"
## Rows: 23 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f11_r4.csv.gz"
## Rows: 114 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f11_r5.csv.gz"
## Rows: 45 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Nanopore of triple cryptic cre/junction_counts/f11_r6.csv.gz"
## Rows: 96 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- cryptic_df %>%
filter(bc1 == "f10") %>%
mutate(ce_string = paste(as.numeric(first_cryptic), as.numeric(second_cryptic),
as.numeric(third_cryptic), sep="_")) %>%
group_by(ce_string, bc2) %>%
mutate(n = sum(number_of_reads)) %>%
ungroup() %>%
group_by(bc2) %>%
select(bc2, ce_string, n, dox) %>%
unique() %>%
mutate(pc = 100*n/sum(n)) %>%
ungroup() %>%
select(ce_string, bc2, dox, pc) %>%
group_by(ce_string, bc2) %>%
mutate(ce_n = sum(as.numeric(str_split(ce_string, pattern = "_", simplify = T)))) %>%
ungroup() %>%
group_by(bc2, ce_n) %>%
mutate(total_this_n = sum(pc)) %>%
select(dox, bc2, ce_n, total_this_n) %>%
unique() %>%
ungroup() %>%
group_by(ce_n, dox) %>%
mutate(mean_pc = mean(total_this_n),
sd_pc = ifelse(is.na(sd(total_this_n)), 0, sd(total_this_n))) %>%
select(dox, ce_n, mean_pc, sd_pc) %>%
unique() %>%
bind_rows(data.frame(dox = FALSE, ce_n = 3, mean_pc = 0, sd_pc = 0)) # adding so it displays correctly
ggplot(df, aes(x = ce_n, y = mean_pc, fill=dox)) +
geom_bar(stat="identity", position="dodge") +
geom_errorbar(aes(x = ce_n, ymin = mean_pc-sd_pc, ymax = mean_pc + sd_pc), position=position_dodge(0.8), width =0.4) +
xlab("Number of cryptic exons included") +
ylab("%") +
#ggtitle("Number of cryptic exons included with/without shTDP") +
ggpubr::theme_pubclean() +
ggeasy::easy_remove_legend() +
ggsci::scale_fill_npg()
ggsave("markdown_images/Cre/Cre cryptic plot.pdf", height = 3.5, width = 8, units = "cm")
rm(list=setdiff(ls(), "data_dir"))
This used a special version of SpliceNouveau which saved the sequence and score after every successful iteration
downsample_amount = 10 # need to downsample otherwise difficult to see
df <- read_csv(paste0(data_dir, "mScarlet_evolution/mscar_track_all5.csv.tracked_scores.csv.gz")) %>%
group_by(attempt) %>%
mutate(max_score = max(score)) %>%
ungroup() %>%
filter(max_score == max(max_score)) %>%
filter(position < 1250)
## Rows: 393660 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): sequence
## dbl (6): attempt, iteration, position, donor_prob, acceptor_prob, score
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
y_df <- df %>%
select(iteration) %>%
unique() %>%
mutate(y = 1:n())
df2 <- df %>%
left_join(y_df) %>%
select(position, donor_prob, acceptor_prob, y, score) %>%
mutate(downsampled = downsample_amount * round(position / downsample_amount)) %>%
pivot_longer(cols = c("donor_prob", "acceptor_prob")) %>%
group_by(downsampled, y) %>%
mutate(ds_value = max(value)) %>%
ungroup() %>%
select(downsampled, ds_value, y, score) %>%
unique()
## Joining with `by = join_by(iteration)`
p1 <- ggplot(df2, aes(x = downsampled, y = fct_rev(ordered(y)), fill = (ds_value^0.35))) +
geom_raster() +
scale_fill_viridis_c() +
ggpubr::theme_classic2() +
xlab("Position in construct") +
ylab("") +
ggeasy::easy_remove_legend() +
ggeasy::easy_remove_y_axis() +
scale_x_discrete(position = "top")
p2 <- ggplot(df2 %>%
select(y, score) %>%
unique(), aes(y = (score-min(score)), x = fct_rev(ordered(y)), fill = log(score-min(score)))) +
geom_bar(stat="identity") +
coord_flip() +
scale_fill_viridis_c() +
ggpubr::theme_classic2() +
ggeasy::easy_remove_legend() +
ggeasy::easy_remove_y_axis() +
ylab("Fitness") +
scale_y_discrete(position = "right")
(p1 | p2) + plot_layout(widths = c(4, 1))
ggsave("markdown_images/mScar/evolution.pdf", height=10, width=13, units="cm")
For supplementary we want to plot all five, but for the main fig just plot the best one
files2 <- Sys.glob(paste0(data_dir,
"Gluc/junction_counts/*csv.gz"))
files2 <- files2[!str_detect(files2, "no_match")]
for(file in files2){
print(file)
df <- data.frame(read_csv(file)) %>%
mutate(f_bc = as.numeric(str_sub(word(word(file, -1, -1, sep="/"), 1, sep="_"), 2, -1))) %>%
mutate(r_bc = as.numeric((str_sub(word(word(word(file, -1, -1, sep="/"), 2, sep="_"), 1, sep="\\."), 2, -1)))) %>%
mutate(last_junction = word(junctions, -1, sep=";")) %>%
mutate(last_intron_length = as.numeric(word(last_junction, 2, sep="-")) - as.numeric(word(last_junction, 1, sep="-"))) %>%
mutate(first_junction = word(junctions, 1, sep=";"),
second_junction = word(junctions, 2, sep=";")) %>%
mutate(n_introns = str_count(junctions, ";") + 1)
if(file == files2[1]){
full_junc_df <- df
} else {
full_junc_df <- bind_rows(full_junc_df, df)
}
}
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F1_R1.csv.gz"
## Rows: 324 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F1_R2.csv.gz"
## Rows: 1391 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F1_R3.csv.gz"
## Rows: 713 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F1_R4.csv.gz"
## Rows: 841 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F1_R5.csv.gz"
## Rows: 1545 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F1_R6.csv.gz"
## Rows: 1963 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F1_R7.csv.gz"
## Rows: 1613 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F2_R1.csv.gz"
## Rows: 715 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F2_R2.csv.gz"
## Rows: 2862 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F2_R3.csv.gz"
## Rows: 2032 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F2_R4.csv.gz"
## Rows: 2362 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F2_R5.csv.gz"
## Rows: 3566 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F2_R6.csv.gz"
## Rows: 4072 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F2_R7.csv.gz"
## Rows: 4069 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F3_R1.csv.gz"
## Rows: 440 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F3_R2.csv.gz"
## Rows: 306 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F3_R3.csv.gz"
## Rows: 1305 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F3_R4.csv.gz"
## Rows: 1169 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F3_R5.csv.gz"
## Rows: 2504 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F3_R6.csv.gz"
## Rows: 2953 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F3_R7.csv.gz"
## Rows: 2488 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F4_R1.csv.gz"
## Rows: 779 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F4_R2.csv.gz"
## Rows: 2109 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F4_R3.csv.gz"
## Rows: 1083 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F4_R4.csv.gz"
## Rows: 2206 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F4_R5.csv.gz"
## Rows: 3351 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F4_R6.csv.gz"
## Rows: 3962 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F4_R7.csv.gz"
## Rows: 3568 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F5_R1.csv.gz"
## Rows: 182 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F5_R2.csv.gz"
## Rows: 2952 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F5_R3.csv.gz"
## Rows: 1878 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F5_R4.csv.gz"
## Rows: 1763 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F5_R5.csv.gz"
## Rows: 3183 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F5_R6.csv.gz"
## Rows: 3607 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F5_R7.csv.gz"
## Rows: 3535 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F6_R1.csv.gz"
## Rows: 591 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F6_R2.csv.gz"
## Rows: 2318 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F6_R3.csv.gz"
## Rows: 2128 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F6_R4.csv.gz"
## Rows: 2036 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F6_R5.csv.gz"
## Rows: 3118 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F6_R6.csv.gz"
## Rows: 3352 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Gluc/junction_counts/F6_R7.csv.gz"
## Rows: 4561 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Analyse whether they primarily align to the correct sequences and check
# whether they are productively spliced
full_junc_df2 <- full_junc_df %>%
mutate(expected_rname = case_when(r_bc == 1 ~ "design1_Gluc",
r_bc == 2 ~ "Gluc_prepared",
r_bc == 3 ~ "Design2_Gluc_A2",
r_bc == 4 ~ "Design2_Gluc_A3",
r_bc == 5 ~ "Design2_Gluc_A5",
r_bc == 6 ~ "Design2_Gluc_A6",
r_bc == 7 ~ "Design2_Gluc_A7")) %>%
filter(reference == expected_rname) %>%
filter(!(r_bc == 1 & f_bc == 5)) %>%
filter(!(r_bc == 2 & f_bc == 3)) %>%
filter(last_intron_length == 94) %>%
mutate(productively_spliced = junctions == case_when(r_bc == 1 ~ "106-270;357-586;1248-1342",
r_bc == 2 ~ "605-699",
r_bc == 3 ~ "252-416;477-706;996-1090",
r_bc == 4 ~ "252-416;477-706;996-1090",
r_bc == 5 ~ "252-416;477-706;996-1090",
r_bc == 6 ~ "252-416;477-715;1005-1099",
r_bc == 7 ~ "252-416;477-715;1005-1099")) %>%
filter(!str_detect(flag_string, "not primary|supplementary")) %>%
select(f_bc, r_bc, productively_spliced, expected_rname, number_of_reads) %>%
unique() %>%
group_by(f_bc, r_bc) %>%
mutate(perc_productive = 100*sum(as.numeric(productively_spliced) * number_of_reads) / sum(number_of_reads)) %>%
dplyr::select(f_bc, r_bc, expected_rname, perc_productive) %>%
unique() %>%
mutate(treatment = ifelse(f_bc %% 2 == 0, "shTDP", "NT"))
nano_summary <- full_junc_df2 %>%
ungroup() %>%
mutate(good_name = case_when(r_bc == 1 ~ "Upstream",
r_bc == 2 ~ "Constitutive",
r_bc == 3 ~ "TDP-REGv2\n#1",
r_bc == 4 ~ "TDP-REGv2\n#2",
r_bc == 5 ~ "TDP-REGv2\n#3",
r_bc == 6 ~ "TDP-REGv2\n#4",
r_bc == 7 ~ "TDP-REGv2\n#5")) %>%
mutate(paper_name = case_when(r_bc == 1 ~ "TDP-REGv1",
r_bc == 2 ~ "+ve",
r_bc == 3 ~ "TDP-REGv2\n#1",
r_bc == 4 ~ "TDP-REGv2\n#2",
r_bc == 5 ~ "TDP-REGv2\n#3",
r_bc == 6 ~ "TDP-REGv2\n#4",
r_bc == 7 ~ "TDP-REGv2\n#5")) %>%
group_by(treatment, good_name) %>%
mutate(mean_perc = mean(perc_productive),
sd_perc = sd(perc_productive)) %>%
select(mean_perc, sd_perc, paper_name) %>%
unique() %>%
ungroup() %>%
group_by(good_name) %>%
mutate(log2fc = log2(max(mean_perc)/min(mean_perc)))
## Adding missing grouping variables: `treatment`, `good_name`
ggplot(nano_summary,
aes(x = paper_name, y = mean_perc, fill = treatment)) +
geom_bar(stat="identity", position="dodge") +
geom_errorbar(aes(ymin = mean_perc - sd_perc, ymax = mean_perc + sd_perc),
position="dodge") +
ylab("% productively\nspliced") +
xlab("") +
theme_classic() +
ggeasy::easy_add_legend_title("Treatment")+
ggsci::scale_fill_npg()
ggsave("markdown_images/gluc/all_gluc_nanopore_results.pdf", height = 9, width = 19, units="cm")
nano_plot <- ggplot(nano_summary %>% filter(paper_name %in% c("TDP-REGv1", "TDP-REGv2\n#5")),
aes(x = paper_name, y = mean_perc, fill = treatment)) +
geom_bar(stat="identity", position="dodge") +
geom_errorbar(aes(ymin = mean_perc - sd_perc, ymax = mean_perc + sd_perc),
position="dodge") +
ylab("% productively\nspliced") +
xlab("") +
theme_classic() +
ggeasy::easy_add_legend_title("Treatment") +
ggsci::scale_fill_npg()
df <- read_csv(paste0(data_dir, "Gluc/oscargluc 23 june 2022.csv")) %>%
pivot_longer(cols = c(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`)) %>%
mutate(column = as.numeric(name)) %>%
select(-name) %>%
filter(column < 7) %>%
filter(!(column == 3 & row == "B")) %>% # remove bad well
mutate(treatment = factor(ifelse(column %% 2 == 0, "shTDP", "NT"), levels = c("NT", "shTDP"))) %>%
group_by(treatment, row) %>%
mutate(av = mean(value),
sd = sd(value))
## Rows: 8 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): row
## dbl (12): 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
name_df <- data.frame(row = c("A", "B", "C", "D", "E", "F", "G", "H"),
names = c("Design1", "Positive", "A2", "A3", "A5", "A6", "A7", "mScarlet"),
clear_name = factor(c("TDP-REGv1", "Constitutive", "TDP-REGv2\n#1", "TDP-REGv2\n#2", "TDP-REGv2\n#3", "TDP-REGv2\n#4", "TDP-REGv2\n#5", "mScarlet"),
levels = c("Constitutive", "mScarlet", "TDP-REGv1", "TDP-REGv2\n#1", "TDP-REGv2\n#2", "TDP-REGv2\n#3", "TDP-REGv2\n#4", "TDP-REGv2\n#5")))
summary <- df %>%
left_join(name_df) %>%
ungroup() %>%
select(clear_name, treatment, av, sd) %>%
unique() %>%
group_by(clear_name) %>%
mutate(shTDP_av = max(ifelse(treatment == "shTDP", av, 0)),
nt_av = max(ifelse(treatment == "NT", av, 0))) %>%
mutate(log2fc = log2(shTDP_av/nt_av))
## Joining with `by = join_by(row)`
nice_names <- data.frame(clear_name = c("TDP-REGv2\n#3", "TDP-REGv2\n#5", "mScarlet", "TDP-REGv1"),
clear_name2 = c("TDP-REGv2\n# 1", "TDP-REGv2\n# 2", "Negative", "AARS1-\nbased"),
clear_name3 = factor(c("", "TDP-REGv2\n#5", "Negative", "TDP-REGv1"),
levels = c("TDP-REGv1", "TDP-REGv2\n#5", "Negative", "")))
summary3 <- filter(summary, clear_name %in% c("TDP-REGv2\n#3", "TDP-REGv2\n#5", "mScarlet", "TDP-REGv1")) %>%
left_join(nice_names)
## Joining with `by = join_by(clear_name)`
p1 <- ggplot(summary3 %>%
filter(clear_name3 != ""), aes(x = clear_name3, y = av/10000, fill = treatment)) +
geom_bar(stat="identity", position = "dodge") +
geom_errorbar(aes(ymin = (av-sd)/10000, ymax = (av+sd)/10000), position="dodge") +
#geom_text(aes(label = round(log2fc,2), y = 3000000)) +
xlab("") +
ylab("Luminescence\nx10,000") +
#ggtitle("TDP-43-regulated secreted luciferase signal") +
ggpubr::theme_classic2() +
#geom_text(aes(label = round(log2fc,2), y = 300)) +
ggeasy::easy_add_legend_title("Treatment") +
ggsci::scale_fill_npg() #+
#ggeasy::easy_rotate_x_labels(side = "right")
p1
((nano_plot + ggeasy::easy_remove_legend()) | p1) + plot_layout(widths = c(1, 1.6))
ggsave("markdown_images/gluc/combined nanopore and luciferase.pdf", width = 19, height = 7, units = "cm")
This is just to show that the splicing is exactly as predicted
files3 <- Sys.glob(paste0(data_dir,
"Gluc/pileup/*.csv.gz"))
# Get index of files for cols 5 and 6 in the plate for the best internal cryptic
nt <- which(str_detect(files3, "F5_R7"))
dox <- which(str_detect(files3, "F6_R7"))
for(i in c(nt, dox)){
this_file <- files3[i]
this_pu <- read_csv(this_file) %>%
filter(insertion_number == 0) %>%
# Ensure that we only look for the correct alignment
group_by(reference_name) %>%
mutate(n_this_rname = sum(n)) %>%
ungroup() %>%
filter(n_this_rname == max(n_this_rname)) %>%
filter(position %% 1 == 0) %>%
group_by(position) %>%
mutate(score = ifelse(nt == "del", 0, n)) %>%
mutate(n_at_pos = sum(score)) %>%
select(n_at_pos) %>%
unique() %>%
mutate(sample = ifelse(i == nt, "NT", "shTDP")) %>%
ungroup() %>%
mutate(frac = n_at_pos / max(n_at_pos))
if(i == nt){
coverage_df <- this_pu
} else {
coverage_df <- bind_rows(coverage_df, this_pu)
}
}
## Rows: 25482 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Adding missing grouping variables: `position`
## Rows: 33716 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Adding missing grouping variables: `position`
ggplot(coverage_df %>% ungroup(), aes(x = position, y = n_at_pos)) +
geom_area() +
facet_wrap(~sample, ncol=1, scales="free_y") +
theme_void() +
ggsci::scale_fill_npg()
ggsave("markdown_images/gluc/gluc_internal_5_pileups.pdf")
## Saving 7 x 5 in image
rm(list=setdiff(ls(), "data_dir"))
We transduced mice with an AAV expressing mCherry. The images were manually quantified (with blinding)
quants <- read_csv(paste0(data_dir, "AAV/quantifications_from_jo_fixed.csv")) %>%
filter(!is.na(triple)) %>%
dplyr::rename(tdp_and_vacht = tdp_and_vacth) %>%
mutate(vacht = as.numeric(vacht)) %>%
# Compute some useful values
mutate(n_mcherry_no_vacht = mcherry - vacht_and_mcherry,
n_vacht_no_tdp = vacht - tdp_and_vacht) %>%
group_by(id) %>%
# Compute summaries per animal
mutate(total_n_mcherry = sum(mcherry),
total_n_vacht_and_mcherry = sum(vacht_and_mcherry),
total_n_vacht = sum(vacht),
total_n_vacht_and_tdp = sum(tdp_and_vacht),
total_n_mcherry_no_vacht = sum(n_mcherry_no_vacht),
total_n_vacht_no_tdp = sum(n_vacht_no_tdp),
total_n_triple = sum(triple),
total_n_tdp = sum(vacht - n_vacht_no_tdp)) %>%
select(id, contains("total")) %>%
unique() %>%
# Answer question 1:
mutate(frac_vacht_with_TDP = total_n_vacht_and_tdp/total_n_vacht) %>%
# Answer question 2:
mutate(frac_vacht_with_mcherry = total_n_vacht_and_mcherry/total_n_vacht) %>%
# Answer question 4:
mutate(n_vacht_mcherry_and_tdp = total_n_triple,
n_vacht_mcherry_no_tdp = total_n_vacht_and_mcherry - total_n_triple,
n_vacht_tdp_no_mcherry = total_n_vacht_and_tdp - total_n_triple) %>%
mutate(n_vacht_no_mcherry_no_tdp = total_n_vacht - n_vacht_mcherry_and_tdp - n_vacht_mcherry_no_tdp - n_vacht_tdp_no_mcherry) %>%
mutate(genotype = ifelse(id %in% c(29, 30, 31, 40, 206), "cKO", "Control")) %>%
# Add entry for cKO in which no fluorescence was seen
bind_rows(data.frame(id = -1, genotype = "cKO")) %>%
ungroup() %>%
arrange(desc(genotype)) %>%
mutate(label = 1:n())
## Rows: 444 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): section, vacht
## dbl (5): id, mcherry, vacht_and_mcherry, tdp_and_vacth, triple
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
quants[is.na(quants)] <- 0
quants_a244 <- quants
ggplot(quants, aes(x = factor(label), y = 100*frac_vacht_with_mcherry,
fill = genotype)) +
geom_bar(stat="identity") +
facet_wrap(~factor(genotype, levels = c("Control", "cKO")),
scale = "free_x") +
ylab("% MNs with mCherry") +
theme_classic() +
ggsci::scale_fill_npg() +
xlab("Animal number") +
ggeasy::easy_add_legend_title("Genotype")
ggsave("markdown_images/AAV/pc_mCherry_MNs.pdf", height = 4.5, width = 8, units="cm")
# ggplot(quants, aes(x = factor(label), y = 100*frac_vacht_with_TDP,
# fill = genotype)) +
# geom_bar(stat="identity") +
# facet_wrap(~factor(genotype, levels = c("Control", "cKO")),
# scale = "free_x") +
# ylab("% MNs with TDP-43") +
# theme_classic() +
# ggsci::scale_fill_npg() +
# xlab("Animal number") +
# ggeasy::easy_add_legend_title("Genotype")
#
# ggsave("markdown_images/AAV/pc_TDP43_MNs.pdf", height = 4.5, width = 8, units="cm")
Now let’s do the same but for the internal cryptic mScarlet
quants <- read_csv(paste0(data_dir, "AAV/a241 quantifications.csv")) %>%
filter(!is.na(triple)) %>%
mutate(mcherry = vacht_and_mcherry) %>%
mutate(vacht = as.numeric(vacht)) %>%
# Compute some useful values
mutate(n_mcherry_no_vacht = mcherry - vacht_and_mcherry,
n_vacht_no_tdp = vacht - tdp_and_vacht) %>%
group_by(id) %>%
# Compute summaries per animal
mutate(total_n_mcherry = sum(mcherry),
total_n_vacht_and_mcherry = sum(vacht_and_mcherry),
total_n_vacht = sum(vacht),
total_n_vacht_and_tdp = sum(tdp_and_vacht),
total_n_mcherry_no_vacht = sum(n_mcherry_no_vacht),
total_n_vacht_no_tdp = sum(n_vacht_no_tdp),
total_n_triple = sum(triple),
total_n_tdp = sum(vacht - n_vacht_no_tdp)) %>%
select(id, contains("total")) %>%
unique() %>%
# Answer question 1:
mutate(frac_vacht_with_TDP = total_n_vacht_and_tdp/total_n_vacht) %>%
# Answer question 2:
mutate(frac_vacht_with_mcherry = total_n_vacht_and_mcherry/total_n_vacht) %>%
# Answer question 4:
mutate(n_vacht_mcherry_and_tdp = total_n_triple,
n_vacht_mcherry_no_tdp = total_n_vacht_and_mcherry - total_n_triple,
n_vacht_tdp_no_mcherry = total_n_vacht_and_tdp - total_n_triple) %>%
mutate(n_vacht_no_mcherry_no_tdp = total_n_vacht - n_vacht_mcherry_and_tdp - n_vacht_mcherry_no_tdp - n_vacht_tdp_no_mcherry) %>%
mutate(genotype = ifelse(id %in% c(254, 256, 257, 283, 284), "cKO", "Control")) %>%
# Add entry for cKO in which no fluorescence was seen
bind_rows(data.frame(id = 255, genotype = "cKO")) %>%
ungroup() %>%
arrange(desc(genotype)) %>%
mutate(label = 1:n())
## New names:
## Rows: 93 Columns: 10
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): section dbl (7): id, vacht, vacht_and_mcherry, tdp_and_vacht, triple, cKO,
## WT lgl (2): ...7, ...8
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...7`
## • `` -> `...8`
quants[is.na(quants)] <- 0
quants_a241 <- quants
ggplot(quants, aes(x = factor(label), y = 100*frac_vacht_with_mcherry,
fill = genotype)) +
geom_bar(stat="identity") +
facet_wrap(~factor(genotype, levels = c("Control", "cKO")),
scale = "free_x") +
ylab("% MNs with mCherry") +
theme_classic() +
ggsci::scale_fill_npg() +
xlab("Animal number") +
ggeasy::easy_add_legend_title("Genotype")
ggsave("markdown_images/AAV/pc_mCherry_MNs.pdf", height = 4.5, width = 8, units="cm")
# ggplot(quants, aes(x = factor(label), y = 100*frac_vacht_with_TDP,
# fill = genotype)) +
# geom_bar(stat="identity") +
# facet_wrap(~factor(genotype, levels = c("Control", "cKO")),
# scale = "free_x") +
# ylab("% MNs with TDP-43") +
# theme_classic() +
# ggsci::scale_fill_npg() +
# xlab("Animal number") +
# ggeasy::easy_add_legend_title("Genotype")
And now the same for the positive control vector
quants <- read_csv(paste0(data_dir, "AAV/A245_quantifications3.csv")) %>%
filter(!is.na(triple)) %>%
mutate(mcherry = vacht_and_mcherry) %>%
mutate(vacht = as.numeric(vacht)) %>%
# Compute some useful values
mutate(n_mcherry_no_vacht = mcherry - vacht_and_mcherry,
n_vacht_no_tdp = vacht - tdp_and_vacht) %>%
group_by(Aminal_ID) %>%
# Compute summaries per animal
mutate(total_n_mcherry = sum(mcherry),
total_n_vacht_and_mcherry = sum(vacht_and_mcherry),
total_n_vacht = sum(vacht),
total_n_vacht_and_tdp = sum(tdp_and_vacht),
total_n_mcherry_no_vacht = sum(n_mcherry_no_vacht),
total_n_vacht_no_tdp = sum(n_vacht_no_tdp),
total_n_triple = sum(triple),
total_n_tdp = sum(vacht - n_vacht_no_tdp)) %>%
select(Aminal_ID, contains("total")) %>%
distinct() %>%
# Answer question 1:
mutate(frac_vacht_with_TDP = total_n_vacht_and_tdp/total_n_vacht) %>%
# Answer question 2:
mutate(frac_vacht_with_mcherry = total_n_vacht_and_mcherry/total_n_vacht) %>%
# Answer question 4:
mutate(n_vacht_mcherry_and_tdp = total_n_triple,
n_vacht_mcherry_no_tdp = total_n_vacht_and_mcherry - total_n_triple,
n_vacht_tdp_no_mcherry = total_n_vacht_and_tdp - total_n_triple) %>%
mutate(n_vacht_no_mcherry_no_tdp = total_n_vacht - n_vacht_mcherry_and_tdp - n_vacht_mcherry_no_tdp - n_vacht_tdp_no_mcherry) %>%
mutate(genotype = "Control") %>%
ungroup() %>%
arrange(desc(genotype)) %>%
mutate(label = 1:n())
## Rows: 40 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Section
## dbl (5): Aminal_ID, vacht, vacht_and_mcherry, tdp_and_vacht, triple
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
quants_a245 <- quants
ggplot(quants, aes(x = factor(label), y = 100*frac_vacht_with_mcherry,
fill = genotype)) +
geom_bar(stat="identity") +
facet_wrap(~factor(genotype, levels = c("Control", "cKO")),
scale = "free_x") +
ylab("% MNs with mCherry") +
theme_classic() +
ggsci::scale_fill_npg() +
xlab("Animal number") +
ggeasy::easy_add_legend_title("Genotype")
#ggsave("markdown_images/AAV/pc_mCherry_MNs.pdf", height = 4.5, width = 8, units="cm")
ggplot(quants, aes(x = factor(label), y = 100*frac_vacht_with_TDP,
fill = genotype)) +
geom_bar(stat="identity") +
facet_wrap(~factor(genotype, levels = c("Control", "cKO")),
scale = "free_x") +
ylab("% MNs with TDP-43") +
theme_classic() +
ggsci::scale_fill_npg() +
xlab("Animal number") +
ggeasy::easy_add_legend_title("Genotype")
Now let’s combine the quantifications for all three vectors
combined_quants <- bind_rows(quants_a244 %>% mutate(vector = "TDP-REGv1\nmCherry"),
quants_a241 %>% mutate(vector = "TDP-REGv2\nmScarlet")) %>%
bind_rows(quants_a245 %>% mutate(vector = "+ve\nmScarlet")) %>%
mutate(vector = factor(vector, levels = c("TDP-REGv1\nmCherry", "TDP-REGv2\nmScarlet", "+ve\nmScarlet"))) %>%
mutate(genotype = factor(genotype, levels = c("Control", "cKO")))
ggplot(combined_quants, aes(x = genotype, y = 100*frac_vacht_with_mcherry,
fill = genotype)) +
geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 4, dotsize = 1.1) +
facet_wrap(~vector, scales = "free_x") +
ylab("% MNs RFP +ve") +
ggpubr::theme_classic2() +
xlab("") +
ggeasy::easy_add_legend_title("Genotype") +
ggsci::scale_fill_npg()
ggplot(combined_quants, aes(x = genotype, y = 100*frac_vacht_with_mcherry,
colour = genotype)) +
#geom_quasirandom(varwidth = TRUE) +
geom_beeswarm(cex = 4, method = "compactswarm", size = 1.8, alpha = 0.8) +
facet_wrap(~vector, scales = "free_x") +
ylab("% MNs RFP +ve") +
ggpubr::theme_classic2() +
xlab("") +
ggeasy::easy_add_legend_title("Genotype") +
ggsci::scale_fill_npg()
ggsave("markdown_images/AAV/pc_TDP43_MNs_combined_quants_incl_a245.pdf", height = 5.5, width = 12, units="cm")
# ggplot(combined_quants, aes(x = genotype, y = 100*frac_vacht_with_TDP,
# fill = genotype)) +
# geom_dotplot(binaxis = "y", stackdir = "center") +
# facet_wrap(~factor(genotype, levels = c("Control", "cKO")),
# scale = "free_x") +
# ylab("% MNs with TDP-43") +
# theme_classic() +
# ggsci::scale_fill_npg() +
# xlab("Animal number") +
# ggeasy::easy_add_legend_title("Genotype") +
# facet_wrap(~vector)
We transfected various mScarlet reporters into HEK293T cells with either SNAP-12QN-TDP-43 or just a plain Halo tag not attached to anything. We added a little bit of mGreenLantern plasmid to all of them too, just to check that the transfection worked. Note that the first two rows went a little bit wrong during transfection - volume was lost, so the last couple didn’t work.
Also, it seems like the Design 1 AARS1-mCherry transfection failed. The plasmid had been stored at 4 degrees (instead of frozen) for an extended period so I assume this is why. I saw similar problems in another transfection.
We’ll ignore this row.
generate_full_image <- function(combined_positions, all_images, n_rows,
n_columns=6, dim_image=200, spacing_images=5,
spacing_wells=30, background_quantile=0.97){
# This function makes a plot from incucyte data.
# You need to supply a "combined_positions" dataframe that has values for
# plate, well and Order
# Currently it only works when you have four images per well (stupidly named "replicates")
height <- n_rows*(2*dim_image + spacing_images) + (n_rows-1)*spacing_wells + 1
width <- n_columns*(2*dim_image + spacing_images) + (n_columns-1)*spacing_wells + 1
full_image <- matrix(nrow = height, ncol = width, 0)
background <- 10000000 # set arbitrarily large initial value
for(image in all_images){
filename <- word(image, sep="/", start=-1, end = -1)
plate <- paste(ifelse(str_detect(image, "control plate"), 2, 1))
well = word(filename, sep="_", start=2, end=2)
plate_column = as.numeric(str_sub(well, 2, 3))
image_column = ifelse(plate_column %% 6 == 0, 6, plate_column %% 6)
image_row = unique(combined_positions$Order[which(combined_positions$well == well &
combined_positions$Plate == plate)])
if(is.na(image_row)){
next
}
if("replicate" %in% colnames(combined_positions)){
replicate <- combined_positions$replicate[which(combined_positions$filename == filename)]
} else {
replicate = as.numeric(word(filename, sep="_", start=3, end=3))
}
image_matrix <- readImage(image)
smaller <- as.array(EBImage::resize(image_matrix, w=dim_image, h=dim_image))
well_top_left_x = (image_column - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
image_top_left_x = well_top_left_x + ifelse(replicate %in% c(2,4), dim_image + spacing_images, 0)
well_top_left_y = (image_row - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
image_top_left_y = well_top_left_y + ifelse(replicate %in% c(3,4), dim_image + spacing_images, 0)
full_image[image_top_left_y:(image_top_left_y+dim_image-1),
image_top_left_x:(image_top_left_x + dim_image-1)] <- smaller
this_background <- quantile(smaller, 0.05)
if(this_background < background){
background <- this_background
}
}
full_image2 <- full_image - background
img_invert <- Image(1 - t(full_image2/quantile(full_image2, background_quantile)))
return(img_invert)
}
red_12qn <- Sys.glob(paste0(data_dir, "12qn from incucyte/red/*"))
just_a11 = red_12qn[which(str_detect(red_12qn, "VID411_D"))]
positions <- data.frame(filename = word(red_12qn, -1, sep="/")) %>%
mutate(well = word(filename, 2, sep="_"),
replicate = as.numeric(word(filename, 3, sep="_"))) %>%
mutate(row = str_sub(well, 1, 1),
column = as.numeric(str_sub(well, 2, 2))) %>%
left_join(data.frame(row = c("A", "B", 'C', 'D', 'E', 'F', 'G', 'H'),
Order = c(1, 2, 3, 4, 5, 6, 7, 8))) %>%
mutate(plate_column = column) %>%
mutate(Plate = 1) %>%
filter(row == "D") %>%
mutate(Order =1) %>%
mutate(replicate = case_when(replicate == 2 ~ 3,
replicate == 3 ~ 2,
T ~ replicate))
## Joining with `by = join_by(row)`
im <- generate_full_image(positions, just_a11, n_rows = 1, background_quantile = 0.995)
green_12qn <- Sys.glob(paste0(data_dir, "12qn from incucyte/green/*"))
just_a11_green = green_12qn[which(str_detect(red_12qn, "VID411_D"))]
positions_green <- data.frame(filename = word(green_12qn, -1, sep="/")) %>%
mutate(well = word(filename, 2, sep="_"),
replicate = as.numeric(word(filename, 3, sep="_"))) %>%
mutate(row = str_sub(well, 1, 1),
column = as.numeric(str_sub(well, 2, 2))) %>%
left_join(data.frame(row = c("A", "B", 'C', 'D', 'E', 'F', 'G', 'H'),
Order = c(1, 2, 3, 4, 5, 6, 7, 8))) %>%
mutate(plate_column = column) %>%
mutate(Plate = 1) %>%
filter(row == "D") %>%
mutate(Order =1) %>%
mutate(replicate = case_when(replicate == 2 ~ 3,
replicate == 3 ~ 2,
T ~ replicate))
## Joining with `by = join_by(row)`
im_green <- generate_full_image(positions_green, just_a11_green, n_rows = 1, background_quantile = 0.97)
rgb_red <- rgbImage(red = (1-im), green = NULL, blue = NULL)
display(rgb_red)
writeImage(rgb_red, "markdown_images/12QN incucyte/red_a11.png", quality=99)
rgb_green <- rgbImage(red = NULL, green = 1-im_green, blue = NULL)
display(rgb_green)
writeImage(rgb_green, "markdown_images/12QN incucyte/green_a11.png", quality=99)
all_files <- Sys.glob(paste0(data_dir, "Prime editing/pileup 8th nov 22/*.csv.gz"))
all_files <- all_files[!str_detect(all_files, "no_match")]
for(file in all_files){
print(file)
code = word(file, -1, -1, sep="/")
this_df <- read_csv(file) %>%
mutate(filename = code)
if(file == all_files[1] && nrow(this_df) > 0){
all_df <- this_df
} else {
if(nrow(this_df) > 0){
all_df <- bind_rows(all_df, this_df)
}
}
}
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_1.csv.gz"
## Rows: 12885 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_2.csv.gz"
## Rows: 13622 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_3.csv.gz"
## Rows: 13359 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_4.csv.gz"
## Rows: 11500 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_5.csv.gz"
## Rows: 19310 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_6.csv.gz"
## Rows: 24537 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_7.csv.gz"
## Rows: 18299 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_11_40nt_bc_R_8.csv.gz"
## Rows: 19410 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_1.csv.gz"
## Rows: 20661 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_2.csv.gz"
## Rows: 16023 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_3.csv.gz"
## Rows: 20435 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_4.csv.gz"
## Rows: 16731 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_5.csv.gz"
## Rows: 22952 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_6.csv.gz"
## Rows: 27569 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_7.csv.gz"
## Rows: 21558 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data/Prime editing/pileup 8th nov 22/barcodes_40nt_bc_F_12_40nt_bc_R_8.csv.gz"
## Rows: 20836 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): nt
## dbl (5): reference_name, position, insertion_number, n, total_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
just_unc <- all_df %>%
filter(reference_name == 0) %>%
arrange(filename, position) %>%
group_by(filename, position, insertion_number) %>%
mutate(n_this_pos = sum(n)) %>%
mutate(frac_this_pos = n/n_this_pos) %>%
ungroup() %>%
mutate(f_bc = as.numeric(word(filename, 5, 5, sep="_")),
r_bc = as.numeric(word(filename, 9, 9, sep="_|\\."))) %>%
mutate(treatment = ifelse(f_bc == 11, "Control", "shTDP"),
Plasmid = ifelse(r_bc <= 4, "12C", "PEMax"))
peg10_561 <- just_unc %>%
filter(position == 561) %>%
filter(position == 561 & nt %in% c("G", "C")) %>%
filter(insertion_number == 0) %>%
ungroup() %>%
group_by(filename) %>%
mutate(frac_c = ifelse(sum(as.numeric(nt == "C")) == 0, 0, max(ifelse(nt == "C", frac_this_pos, -1)))) %>%
mutate(new_name = ifelse(Plasmid == "PEMax", "Constitutive", "Cryptic"))
peg10_561_summary2 <- peg10_561 %>%
ungroup() %>%
dplyr::select(Plasmid, treatment, frac_c) %>%
ungroup() %>%
group_by(Plasmid, treatment) %>%
mutate(mean_p = 100*mean(frac_c),
sd_p = 100*sd(frac_c)) %>%
unique() %>%
mutate(new_name = ifelse(Plasmid == "PEMax", "+ve\nPEMax", "Cryptic\nPEMax"))
ggplot(peg10_561_summary2, aes(x = new_name, y = 100*frac_c, fill = treatment)) +
geom_dotplot(binaxis = "y", position = "dodge",dotsize = 2, binwidth = 0.5, stackdir = "center") +
ggpubr::theme_pubclean() +
ggeasy::easy_add_legend_title("Treatment") +
ylab("% Edited") +
#geom_errorbar(position="dodge", aes(ymin = mean_p-sd_p, ymax = mean_p+sd_p)) +
#ggtitle("Editing of UNC13A cryptic\ndonor splice site") +
xlab("") +
ggsci::scale_color_npg() +
ggsci::scale_fill_npg() +
ggeasy::easy_move_legend("right")
ggsave("markdown_images/PE/dotplot_nanopore_summary.pdf", height = 6.5, width = 8, units = "cm")
These datasets are from a capillary electrophoresis machine (“QIAxcel”)
The below R scripts use the raw data from this machine to produce the desired plots
raw_filename <- paste0(data_dir, "Raver/2FL qiaxcel/design2tdptests_20221015_061200_Rw.csv")
df <- read_tsv(raw_filename) %>%
pivot_longer(cols = contains("RFU")) %>%
mutate(sample = as.numeric(str_sub(name,6, 7))) %>%
select(Time, Row, sample, value) %>%
mutate(plasmid = case_when(
Row == "A" & sample <= 6 ~ "mScarlet (-ve control)",
Row == "B" & sample <= 6 ~ "Plasmid B02",
Row == "C" & sample <= 6 ~ "Plasmid B03",
Row == "D" & sample <= 6 ~ "Plasmid B04",
Row == "E" & sample <= 6 ~ "Plasmid B05",
Row == "F" & sample <= 6 ~ "Plasmid B06",
Row == "A" & sample > 6 ~ "Plasmid B09",
Row == "B" & sample > 6 ~ "Plasmid B10",
Row == "C" & sample > 6 ~ "Plasmid B11",
Row == "D" & sample > 6 ~ "Plasmid B07",
Row == "E" & sample > 6 ~ "Plasmid B08",
Row == "F" & sample < 11 ~ "Plasmid B11 -RT",
Row == "F" & sample == 11 ~ "Water input",
Row == "F" & sample == 12 ~ "+ve control")) %>%
mutate(actual_sample_no = ifelse(sample %% 6 == 0, 6, sample %% 6)) %>%
group_by(plasmid, actual_sample_no) %>%
mutate(norm_value = value/max(value)) %>%
arrange(Time) %>%
mutate(rollmean = zoo::rollmean(norm_value, k=10, na.pad=T)) %>%
mutate(rollmean2 = ifelse(is.na(rollmean), 0, rollmean)) %>%
# Define some backgrounds
mutate(is_above_background = case_when(plasmid =="Plasmid B02" ~ rollmean2 > quantile(norm_value, 0.98, na.rm=T),
plasmid =="mScarlet (-ve control)" ~ rollmean2 > quantile(norm_value, 0.98, na.rm=T),
plasmid =="Plasmid B07" & actual_sample_no==5 ~ rollmean2 > quantile(norm_value, 0.98, na.rm=T),
plasmid =="Water input" & actual_sample_no==5 ~ rollmean2 > quantile(norm_value, 0.98, na.rm=T),
T ~ rollmean2 > quantile(norm_value, 0.95, na.rm=T))) %>%
mutate(lower_marker = min(ifelse(is_above_background, Time, 1000000000)),
upper_marker = max(ifelse(is_above_background, Time, -100000))) %>%
mutate(normalised_time = round((Time-lower_marker)/(upper_marker-lower_marker),3)) %>%
filter(normalised_time > 0 & normalised_time < 1) %>%
#filter(!(plasmid == "Plasmid B10" & actual_sample_no == 6)) %>%
mutate(treatment = ifelse(actual_sample_no %% 2 == 0, "shTDP", "NT"))
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 21809 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): PlateId, Row
## dbl (16): Time, RFU(A01), RFU(A02), RFU(A03), RFU(A04), RFU(A05), RFU(A06), ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# First, let's plot these all
ggplot(df,
aes(x=actual_sample_no, y=normalised_time, fill=norm_value)) +
geom_tile() +
scale_fill_gradient(low="white", high="black") +
theme_classic() +
facet_wrap(~factor(plasmid)) +
ggtitle("10 different TDP-43/Raver1 vectors (with 2FL mutation to block autoregulation)",
"1,3,5=NT, 2,4,6=shTDP") +
ylab("") +
xlab("") +
ggeasy::easy_remove_legend() +
ylim(0.2,0.9)
## Warning: Removed 26298 rows containing missing values (`geom_tile()`).
# Now, let's plot the 2FL gradient to make it pretty
ordered_names <- data.frame(plasmid = c("Plasmid B03", "Plasmid B02",
"Plasmid B04", "Plasmid B07",
"Plasmid B08", "Plasmid B05",
"Plasmid B06", "Plasmid B09",
"Plasmid B11", "Plasmid B10"),
construct_number = factor(1:10))
df2 <- df %>%
inner_join(ordered_names)
## Joining with `by = join_by(plasmid)`
ggplot(df2 %>% filter(actual_sample_no <= 2),
aes(x=construct_number, y=normalised_time, fill=norm_value)) +
geom_tile() +
scale_fill_gradient(low="white", high="black") +
theme_classic() +
facet_wrap(~treatment, ncol = 1) +
ylab("") +
xlab("Construct number") +
ggeasy::easy_remove_legend() +
ylim(0.25,0.57) +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank())
## Warning: Removed 16472 rows containing missing values (`geom_tile()`).
ggsave("markdown_images/Raver/2FL_gradient.pdf", height = 6, width = 6, units="cm")
## Warning: Removed 16472 rows containing missing values (`geom_tile()`).
To determine whether these constructs resist cryptic splicing, we performed RT-PCRs against UNC13A, STMN2 and AARS1. We ran these on a QIAxcel.
To analyse the QIAxcel data, I wrote a small R package, QIAxcelR.
The samples are loaded in a slightly funky order. The positions are in the “QIAxcel positions.csv” file. Row A refers to the first row of each pair, and Row D refers to the second row of each pair. Column positions and first versus second row is consistent for all pairs.
Annoyingly I lost one replicate of mScarlet untreated. However, this is not a particularly important sample because all of the untreated samples tend to be very similar (mScarlet dox/shTDP is much more important, for example)
First, let’s analyse unc13a in isolation
py <- import("pybaselines")
scipy <- import("scipy")
unc_csv <- paste0(data_dir, "Raver/qiaxcel of unc13a stmn2 aars1/Unc13a 21st June/C220830A16_2023-06-22_2125_20230622_075330_Rw.csv")
positions <- read_csv(paste0(data_dir, "Raver/qiaxcel of unc13a stmn2 aars1/Unc13a 21st June/QIAxcel positions.csv")) %>%
pivot_longer(cols = !contains("Row")) %>%
dplyr::rename(row = `Row/col`, column = name, sample_name = value) %>%
mutate(replicate = as.numeric(word(sample_name, -1)),
plasmid = factor(word(sample_name, 1), levels = c("RV", "B5", "B11", "mScar", "-ve")),
treatment = ifelse(str_detect(sample_name, "NT"), "NT", "shTDP")) %>%
mutate(column = as.numeric(column))
## Rows: 2 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): Row/col, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `replicate = as.numeric(word(sample_name, -1))`.
## Caused by warning:
## ! NAs introduced by coercion
df <- parse_qiaxcel_output(unc_csv)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 7270 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): PlateId, Row
## dbl (16): Time, RFU(A01), RFU(A02), RFU(A03), RFU(A04), RFU(A05), RFU(A06), ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df2 <- preprocess_dataframe(df)
tweak_df <- data.frame(unique_id = "D12", shift = 0, multiply = 0.97)
df3 <- tweak_positions(df2, tweak_df)
df4 <- df3 %>%
mutate(column = as.numeric(column)) %>%
left_join(positions, by = c("row","column")) %>%
mutate(actual_sample_no = ifelse(plasmid == "-ve", 6, 2*replicate - 1 + as.numeric(treatment == "shTDP")))
unc_final_data <- df4
p1 <- ggplot(df4,
aes(y = index_for_plotting, fill = corrected_value, x = actual_sample_no)) +
geom_tile() +
theme_classic() +
scale_fill_gradient(low = "white", high = "black") +
ylim(0.55,0.75) +
facet_wrap(~plasmid) +
ggtitle("Cryptic splicing of UNC13A", "1,3,5 = NT, 2,4,5 = shTDP") +
xlab("Sample number") +
ylab("") +
ggeasy::easy_remove_legend()
yo <- find_molar_ratios(df3, lower_band_pos = 0.58, lower_band_width = 0.02, lower_band_nts = 405,
upper_band_pos= 0.715, upper_band_width = 0.03, upper_band_nts = 405+128) %>%
left_join(positions %>% mutate(unique_id = paste0(row, column))) %>%
filter(!is.na(replicate)) %>%
ungroup() %>%
group_by(plasmid, treatment) %>%
mutate(psi = 100*(1-molar_fraction_lower_band)) %>%
mutate(average_psi = mean(psi)) %>%
mutate(sd_psi = sd(psi)) %>%
mutate(good_names = factor(case_when(plasmid == "RV" ~ "Constitutive\nRaver",
plasmid == "B5" ~ "Cryptic\nRaver 6",
plasmid == "B11" ~ "Cryptic\nRaver 9",
plasmid == "mScar" ~ "mScarlet\ncontrol"),
levels = c("mScarlet\ncontrol","Constitutive\nRaver","Cryptic\nRaver 6","Cryptic\nRaver 9")))
## Joining with `by = join_by(unique_id)`
unc_ratios <- yo
p2 <- ggplot(yo %>% distinct(average_psi, .keep_all=T), aes(x = good_names, y = average_psi, fill = treatment)) +
geom_bar(stat="identity", position= "dodge", alpha = 1) +
ylab("Cryptic exon PSI") +
geom_errorbar(aes(ymin = average_psi-sd_psi,
ymax = average_psi+sd_psi),
position = position_dodge(width = 0.9)) +
theme_classic() +
ggtitle("UNC13A cryptic exon PSI (n=3)") +
ggeasy::easy_add_legend_title("Treatment") +
xlab("")
p1 / p2
## Warning: Removed 81167 rows containing missing values (`geom_tile()`).
Let’s also make a quick plot of representative examples
ggplot(unc_final_data %>% filter(actual_sample_no %in% c(3, 4)) %>%
mutate(good_names = factor(case_when(plasmid == "RV" ~ "Constitutive\nRaver",
plasmid == "B5" ~ "Cryptic\nRaver 6",
plasmid == "B11" ~ "Cryptic\nRaver 9",
plasmid == "mScar" ~ "mScarlet\ncontrol"),
levels = c("mScarlet\ncontrol","Constitutive\nRaver","Cryptic\nRaver 6","Cryptic\nRaver 9"))) %>%
mutate(sample_label = ifelse(actual_sample_no == 3, "NT", "shTDP")),
aes(y = index_for_plotting, fill = corrected_value, x = sample_label)) +
geom_tile() +
theme_classic() +
scale_fill_gradient(low = "white", high = "black") +
ylim(0.53,0.75) +
facet_wrap(~good_names, ncol = 4) +
xlab("Sample number") +
ylab("") +
ggeasy::easy_remove_legend() +
ggeasy::easy_rotate_x_labels(side = "right") +
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) +
xlab("") +
ggeasy::easy_remove_y_axis()
## Warning: Removed 26878 rows containing missing values (`geom_tile()`).
ggsave("markdown_images/Raver/unc qiaxcel examples.pdf", height = 5, width = 8, units="cm")
## Warning: Removed 26878 rows containing missing values (`geom_tile()`).
Now let’s analyse STMN2 in isolation
stmn_csv <- paste0(data_dir, "Raver/qiaxcel of unc13a stmn2 aars1/4 targets 26th June/C220830A16_2023-06-27_2127_20230627_072754_Rw.csv")
df <- parse_qiaxcel_output(stmn_csv) %>%
filter(Row %in% c("C", "D"))
## Rows: 29080 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (18): Time, RFU(A01), RFU(A02), RFU(A03), RFU(A04), RFU(A05), RFU(A06), ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df2 <- preprocess_dataframe(df %>%
filter(!is.na(value2)) %>%
mutate(value2 = as.numeric(value2)))
tweak_df <- data.frame(unique_id = c("C10", "D10", "D11", "D12"), shift = -0.008, multiply = 1)
df3 <- tweak_positions(df2, tweak_df)
df4 <- df3 %>%
mutate(column = as.numeric(column)) %>%
left_join(positions %>%
mutate(row = ifelse(row == "A", "C", "D")), by = c("row","column")) %>%
#distinct(row, column, plasmid, replicate, treatment) %>%
mutate(actual_sample_no = ifelse(plasmid == "-ve", 6, 2*replicate - 1 + as.numeric(treatment == "shTDP")))
p1 <- ggplot(df4,
aes(y = index_for_plotting, fill = corrected_value, x = actual_sample_no)) +
geom_tile() +
theme_classic() +
scale_fill_gradient(low = "white", high = "black") +
facet_wrap(~plasmid) +
ggtitle("Cryptic splicing of STMN2", "1,3,5 = NT, 2,4,5 = shTDP") +
xlab("Sample number") +
ylab("") +
ggeasy::easy_remove_legend() +
ylim(0.17, 0.27)
p1
## Warning: Removed 84152 rows containing missing values (`geom_tile()`).
stmn_final_data <- df4
yo <- find_molar_ratios(df3, lower_band_pos = 0.19, lower_band_width = 0.02, lower_band_nts = 155,
upper_band_pos= 0.23, upper_band_width = 0.02, upper_band_nts = 188) %>%
left_join(positions %>%
mutate(row = ifelse(row == "A", "C", "D")) %>%
mutate(unique_id = paste0(row, column))) %>%
filter(!is.na(replicate)) %>%
ungroup() %>%
group_by(plasmid, treatment) %>%
mutate(psi = 100*(1-molar_fraction_lower_band)) %>%
mutate(average_psi = mean(psi)) %>%
mutate(sd_psi = sd(psi)) %>%
mutate(good_names = factor(case_when(plasmid == "RV" ~ "Constitutive\nRaver",
plasmid == "B5" ~ "Cryptic\nRaver 6",
plasmid == "B11" ~ "Cryptic\nRaver 9",
plasmid == "mScar" ~ "mScarlet\ncontrol"),
levels = c("mScarlet\ncontrol","Constitutive\nRaver","Cryptic\nRaver 6","Cryptic\nRaver 9")))
## Joining with `by = join_by(unique_id)`
stmn_ratios <- yo
p2 <- ggplot(yo %>% distinct(average_psi, .keep_all=T), aes(x = good_names, y = average_psi, fill = treatment)) +
geom_bar(stat="identity", position= "dodge", alpha = 1) +
ylab("Cryptic exon PSI") +
geom_errorbar(aes(ymin = average_psi-sd_psi,
ymax = average_psi+sd_psi),
position = position_dodge(width = 0.9)) +
theme_classic() +
ggtitle("STMN2 cryptic exon PSI (n=3)") +
ggeasy::easy_add_legend_title("Treatment") +
xlab("")
p2
p1 / p2
## Warning: Removed 84152 rows containing missing values (`geom_tile()`).
And now AARs1
aars_csv <- paste0(data_dir, "Raver/qiaxcel of unc13a stmn2 aars1/aars1 3rd july/C220830A16_2023-07-04_2131_20230704_073449_Rw.csv")
df <- parse_qiaxcel_output(aars_csv)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 7269 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): PlateId, Row
## dbl (16): Time, RFU(A01), RFU(A02), RFU(A03), RFU(A04), RFU(A05), RFU(A06), ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df3 <- preprocess_dataframe(df)
df4 <- df3 %>%
mutate(column = as.numeric(column)) %>%
left_join(positions, by = c("row","column")) %>%
mutate(actual_sample_no = ifelse(plasmid == "-ve", 6, 2*replicate - 1 + as.numeric(treatment == "shTDP")))
aars_final_data <- df4
p1 <- ggplot(df4,
aes(y = index_for_plotting, fill = corrected_value, x = actual_sample_no)) +
geom_tile() +
theme_classic() +
scale_fill_gradient(low = "white", high = "black") +
facet_wrap(~plasmid) +
ggtitle("Cryptic splicing of AARS1", "1,3,5 = NT, 2,4,5 = shTDP") +
xlab("Sample number") +
ylab("") +
ylim(0.3, 0.7) +
ggeasy::easy_remove_legend()
p1
## Warning: Removed 73291 rows containing missing values (`geom_tile()`).
yo <- find_molar_ratios(df3, lower_band_pos = 0.35, lower_band_width = 0.05, lower_band_nts = 245,
upper_band_pos= 0.5, upper_band_width = 0.06, upper_band_nts = 245+87) %>%
left_join(positions %>% mutate(unique_id = paste0(row, column))) %>%
filter(!is.na(replicate)) %>%
ungroup() %>%
group_by(plasmid, treatment) %>%
mutate(psi = 100*(1-molar_fraction_lower_band)) %>%
mutate(average_psi = mean(psi)) %>%
mutate(sd_psi = sd(psi)) %>%
mutate(good_names = factor(case_when(plasmid == "RV" ~ "Constitutive\nRaver",
plasmid == "B5" ~ "Cryptic\nRaver 6",
plasmid == "B11" ~ "Cryptic\nRaver 9",
plasmid == "mScar" ~ "mScarlet\ncontrol"),
levels = c("mScarlet\ncontrol","Constitutive\nRaver","Cryptic\nRaver 6","Cryptic\nRaver 9")))
## Joining with `by = join_by(unique_id)`
aars_ratios <- yo
p2 <- ggplot(yo %>% distinct(average_psi, .keep_all=T), aes(x = good_names, y = average_psi, fill = treatment)) +
geom_bar(stat="identity", position= "dodge", alpha = 1) +
ylab("Cryptic exon PSI") +
geom_errorbar(aes(ymin = average_psi-sd_psi,
ymax = average_psi+sd_psi),
position = position_dodge(width = 0.9)) +
theme_classic() +
ggtitle("AARS1 cryptic exon PSI (n=3)") +
ggeasy::easy_add_legend_title("Treatment") +
xlab("")
p2
p1 / p2
## Warning: Removed 73291 rows containing missing values (`geom_tile()`).
Now let’s make some nice summary plots
We’ll use smaller names so that it fits on the page
triple_df <- bind_rows(aars_ratios %>%
mutate(target = "AARS1"),
bind_rows(unc_ratios %>% mutate(target = "UNC13A"),
stmn_ratios %>% mutate(target = "STMN2"))) %>%
ungroup() %>%
mutate(target = factor(target, levels = c("UNC13A", "STMN2", "AARS1"))) %>%
mutate(small_name = factor(case_when(good_names == "mScarlet\ncontrol" ~ "mScarlet",
good_names == "Constitutive\nRaver" ~ "Constitutive",
good_names == "Cryptic\nRaver 6" ~ "Cryptic #6",
good_names == "Cryptic\nRaver 9" ~ "Cryptic #9"),
levels = c("mScarlet", "Constitutive", "Cryptic #6", "Cryptic #9")))
ggplot(triple_df %>% distinct(average_psi, target, .keep_all=T), aes(x = small_name, y = average_psi, fill = treatment)) +
geom_bar(stat="identity", position= "dodge", alpha = 1) +
ylab("Cryptic exon PSI") +
geom_errorbar(aes(ymin = average_psi-sd_psi,
ymax = average_psi+sd_psi),
position = position_dodge(width = 0.9)) +
theme_classic() +
ggeasy::easy_add_legend_title("Treatment") +
xlab("") +
facet_wrap(~target) +
ggeasy::easy_rotate_x_labels(side = "right") +
ggsci::scale_fill_npg() +
ggeasy::easy_legend_at("top")
ggsave("markdown_images/Raver/unc aars and stmn barplot.pdf", height = 7, width = 9, units="cm")
csv <- paste0(data_dir, "Raver/WT autoregulation/C220830A16_2023-07-14_2133_20230714_064133_Rw.csv")
df <- parse_qiaxcel_output(csv)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 7269 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): PlateId, Row
## dbl (16): Time, RFU(A01), RFU(A02), RFU(A03), RFU(A04), RFU(A05), RFU(A06), ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df3 <- preprocess_dataframe(df)
df4 <- df3 %>%
mutate(column = as.numeric(column)) %>%
left_join(positions %>%
mutate(row = ifelse(row == "D", "E", "A")), by = c("row","column")) %>%
mutate(actual_sample_no = ifelse(plasmid == "-ve", 6, 2*replicate - 1 + as.numeric(treatment == "shTDP"))) %>%
filter(plasmid %in% c("RV", "B5", "B11"))
p1 <- ggplot(df4,
aes(y = index_for_plotting, fill = corrected_value, x = actual_sample_no)) +
geom_tile() +
theme_classic() +
scale_fill_gradient(low = "white", high = "black") +
facet_wrap(~plasmid) +
ggtitle("Cryptic splicing of TDP-Raver", "1,3,5 = NT, 2,4,5 = shTDP") +
xlab("Sample number") +
ylab("") +
ylim(0.3, 0.57) +
ggeasy::easy_remove_legend()
p1
## Warning: Removed 58374 rows containing missing values (`geom_tile()`).
yo <- find_molar_ratios(df3, lower_band_pos = 0.35, lower_band_width = 0.05, lower_band_nts = 335-91,
upper_band_pos= 0.5, upper_band_width = 0.05, upper_band_nts = 335) %>%
left_join(positions %>%
mutate(row = ifelse(row == "D", "E", "A"))
%>% mutate(unique_id = paste0(row, column))) %>%
filter(!is.na(replicate)) %>%
ungroup() %>%
group_by(plasmid, treatment) %>%
mutate(psi = 100*(1-molar_fraction_lower_band)) %>%
mutate(average_psi = mean(psi)) %>%
mutate(sd_psi = sd(psi)) %>%
mutate(good_names = factor(case_when(plasmid == "RV" ~ "Constitutive\nRaver",
plasmid == "B5" ~ "Cryptic\nRaver 6",
plasmid == "B11" ~ "Cryptic\nRaver 9",
plasmid == "mScar" ~ "mScarlet\ncontrol"),
levels = c("mScarlet\ncontrol","Constitutive\nRaver","Cryptic\nRaver 6","Cryptic\nRaver 9"))) %>%
filter(plasmid %in% c("RV", "B5", "B11"))
## Joining with `by = join_by(unique_id)`
p2 <- ggplot(yo %>% distinct(average_psi, .keep_all=T), aes(x = good_names, y = average_psi, fill = treatment)) +
geom_bar(stat="identity", position= "dodge", alpha = 1) +
ylab("Cryptic exon PSI") +
geom_errorbar(aes(ymin = average_psi-sd_psi,
ymax = average_psi+sd_psi),
position = position_dodge(width = 0.9)) +
theme_classic() +
ggtitle("TDP-Raver autoregulation") +
ggeasy::easy_add_legend_title("Treatment") +
xlab("") +
ggsci::scale_fill_npg()
p2
p1 / p2
## Warning: Removed 58374 rows containing missing values (`geom_tile()`).
ggsave("markdown_images/Raver/new autoregulation plot.pdf", height = 14, width = 14, units="cm")
## Warning: Removed 58374 rows containing missing values (`geom_tile()`).
We put SKNBE2 cells with dox-inducible expression vectors for constitutive or TDP-REG-gated TDP/Raver (or BFP).
gc_csv <- paste0(data_dir, "Raver/growth competition dream3/C220830A16_2023-07-15_2135_20230715_042136_Rw.csv")
df <- parse_qiaxcel_output(gc_csv)
## Rows: 3634 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): PlateId, Row
## dbl (16): Time, RFU(A01), RFU(A02), RFU(A03), RFU(A04), RFU(A05), RFU(A06), ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df2 <- preprocess_dataframe(df)
#tweak_df <- data.frame(unique_id = "D12", shift = 0, multiply = 0.97)
#df3 <- tweak_positions(df2, tweak_df)
df3 <- df2 %>%
filter(column < 10)
positions <- data.frame(row = "A", column = 1:9) %>%
mutate(dox = c(0, 30, 1000, 0, 30, 1000, 0, 30, 1000),
replicate = c(1, 1, 1, 2, 2, 2, 3, 3, 3))
df4 <- df3 %>%
mutate(column = as.numeric(column)) %>%
left_join(positions, by = c("row","column")) %>%
mutate(actual_sample_no = column)
p1 <- ggplot(df4,
aes(y = index_for_plotting, fill = corrected_value, x = actual_sample_no)) +
geom_tile() +
theme_classic() +
scale_fill_gradient(low = "white", high = "black") +
ylim(0.45,0.8) +
ggtitle("Cryptic splicing of UNC13A", "1,3,5 = NT, 2,4,5 = shTDP") +
xlab("Sample number") +
ylab("") +
ggeasy::easy_remove_legend()
p1
## Warning: Removed 27997 rows containing missing values (`geom_tile()`).
yo <- find_molar_ratios(df3, lower_band_pos = 0.5, lower_band_width = 0.03, lower_band_nts = 335,
upper_band_pos= 0.75, upper_band_width = 0.03, upper_band_nts = 335+280) %>%
left_join(positions %>% mutate(unique_id = paste0(row, column))) %>%
ungroup() %>%
group_by(dox) %>%
mutate(pc_constitutive = 100*(molar_fraction_lower_band)) %>%
mutate(mean_constitutive = mean(pc_constitutive)) %>%
mutate(sd = sd(pc_constitutive)) %>%
mutate(pc_cryptic = 100-pc_constitutive)
## Joining with `by = join_by(unique_id)`
p2 <- ggplot(yo %>% distinct(mean_constitutive, .keep_all=T), aes(x = factor(dox), y = mean_constitutive,
fill = factor(dox))) +
geom_bar(stat="identity", position= "dodge", alpha = 1) +
ylab("% Constitutive") +
geom_errorbar(aes(ymin = mean_constitutive-sd,
ymax = mean_constitutive+sd),
position = position_dodge(width = 0.9)) +
theme_classic() +
ggtitle("TDP/Raver growth competition assay",
"Expression of Constitutive/Cryptic raver is activated by dox") +
ggeasy::easy_add_legend_title("Dox\nng/ml") +
xlab("")
p2
stacked_df <- yo %>%
dplyr::select(dox, replicate, pc_constitutive, pc_cryptic) %>%
pivot_longer(cols = contains("pc_"))
ggplot(stacked_df, aes(x = factor(dox), y = value, fill = str_replace(name, "pc_c", "C"))) +
geom_bar(stat="identity") +
facet_wrap(~paste0("Replicate ", replicate)) +
theme_classic() +
xlab("Doxycycline ng/ul") +
ylab("% detection") +
ggeasy::easy_add_legend_title("Construct") +
ggtitle("Growth competition of dox-activated TDP-43-Raver") +
ggsci::scale_fill_npg()
ggsave("markdown_images/Raver/growth competition plot.pdf", height = 7, width = 14, units="cm")
and now let’s analyse the Nanopore as obviously this is more informative
bams <- Sys.glob(paste0(data_dir, "Raver/growth competition dream3/aligned/*.bam"))
for(bam in bams){
file <- word(word(bam, 2, sep="barcodes_"), 1, sep="\\.bam")
df <- data.frame(scanBam(bam)) %>%
filter(flag < 256) %>%
group_by(rname) %>%
mutate(n = n()) %>%
select(n) %>%
unique() %>%
mutate(combo = file)
if(bam == bams[1]){
all_df <- df
} else {
all_df <- bind_rows(all_df, df)
}
}
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
## Adding missing grouping variables: `rname`
all_df2 <- all_df %>%
filter(!is.na(rname)) %>%
mutate(replicate = case_when(combo < 4 ~ 1,
combo < 7 ~ 2,
T ~ 3)) %>%
mutate(dox = factor(case_when(combo %in% c(1,4,7) ~ 0,
combo %in% c(2,5,8) ~ 30,
combo %in% c(3,6,9) ~ 1000),
levels = c(0, 30, 1000))) %>%
ungroup() %>%
group_by(replicate, dox) %>%
mutate(pc = 100*n/sum(n)) %>%
mutate(good_replicate = paste0("Replicate ", replicate),
good_rname = factor(case_when(rname == "Dream1_rv_constitutive" ~ "Constitutive",
rname == "Dream1_rv_b5" ~ "Cryptic 6",
rname == "Dream1_rv_b11" ~ "Cryptic 9",
rname == "BFP" ~ "BFP"),
levels = c("Constitutive", "Cryptic 6",
"Cryptic 9", "BFP")))
all_df3 <- all_df2 %>%
ungroup() %>%
group_by(good_replicate, good_rname) %>%
mutate(dox0_pc = max(ifelse(dox == 0, pc, -1))) %>%
mutate(change_pc = pc - dox0_pc)
p1 <- ggplot(all_df2 %>% ungroup(), aes(x = dox, y = pc, colour = good_rname)) +
geom_line(aes(group = good_rname)) +
geom_point() +
facet_wrap(~good_replicate) +
theme_classic() +
ylim(0,NA) +
ggeasy::easy_remove_legend() +
ylab("% detected") +
xlab("Doxycycline (ng/ml)") +
ggsci::scale_colour_npg()
p2 <- ggplot(all_df3 %>% filter(dox != 0), aes(x = dox, y = change_pc, fill = good_rname)) +
geom_bar(stat="identity", position = "dodge", width=0.7) +
facet_wrap(~good_replicate) +
theme_classic() +
ggeasy::easy_add_legend_title("Construct") +
ylab("Change in %\nversus 0 ng/ml") +
xlab("Doxycycline (ng/ml)") +
ggsci::scale_fill_npg()
p2
p1 | p2
ggsave("markdown_images/Raver/growth competition summary nanopore.pdf",
height = 6, width = 19, units="cm")
First let’s look at cell lines
df <- read_csv("small_data_files/aars1 specificity/cell_line_counts_psi.csv.gz") %>%
filter(!str_detect(experiment, "FAC")) %>%
mutate(ce_psi = 100*ce_psi) %>%
mutate(condition = ifelse(condition == "control", "Control", "TDP-43 -ve")) %>%
group_by(experiment, condition) %>%
mutate(average_ce = mean(ce_psi),
sd_ce = sd(ce_psi)) %>%
mutate(experiment = str_replace_all(experiment, "-", "\n")) %>%
mutate(experiment = str_replace_all(experiment, "SH\n", "SH-"))
## Rows: 62 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): sample_name, experiment, condition
## dbl (4): annotated, novel_acceptor, novel_donor, ce_psi
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary <- df %>%
distinct(experiment, condition, average_ce, sd_ce)
cell_line_plot <- ggplot(summary, aes(x = experiment, y = average_ce, fill = condition)) +
geom_bar(position = "dodge", stat="identity") +
theme_classic() +
geom_errorbar(data = summary, aes(ymin = average_ce - sd_ce, ymax = average_ce + sd_ce),
width = 0.2, position = position_dodge(width = 0.8)) +
geom_point(data = df, aes(x = experiment, y = ce_psi, colour = "grey50", alpha = 0.1),
position = position_dodge(width = 0.8)) +
scale_fill_npg() +
scale_color_identity() +
ylab("% AARS1 CE inclusion") +
xlab("") +
ggeasy::easy_remove_legend_title() +
ggtitle("AARS1 CE detection in cell lines")
now let’s look at patient samples
df <- read_csv("small_data_files/aars1 specificity/aars1_ce_in_nygc.csv.gz") %>%
dplyr::select(individual, sample, psi, tissue_clean, disease, type, tdp_path, disease_full) %>%
mutate(psi = 100*psi) %>%
pivot_wider(values_from = "psi", names_from = type) %>%
mutate(psi = 0.5*(novel_acceptor + novel_donor)) %>%
group_by(tissue_clean) %>%
mutate(can_be_path = n_distinct(tdp_path) == 2) %>%
filter(can_be_path) %>%
mutate(tdp_disease = ifelse(disease %in% c("ALS-TDP", "FTD-TDP"), "Yes", "No"))
## Rows: 3364 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): paste_into_igv_junction, sample, individual, region, tissue, tissu...
## dbl (4): psi, age, onset, n_vitro
## lgl (1): disease_tissue
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nygc_plot <- ggplot(df, aes(x = disease, y = psi, fill = tdp_disease)) +
geom_boxplot(outlier.alpha = 0.2) +
facet_wrap(~tissue_clean) +
ggpubr::theme_classic2() +
ggeasy::easy_rotate_x_labels(side = "right") +
ggeasy::easy_add_legend_title("TDPopathy?") +
ggtitle("NYGC RNA-seq data",
"Filtered for CNS regions where TDP-43 pathology has been reported") +
xlab("Disease") +
ylab("% AARS1 CE inclusion") +
scale_fill_npg()
cell_line_plot / nygc_plot
ggsave("markdown_images/AARS1 specificity/combined_plot.pdf",
height = 20, width = 17, units="cm")
files <- Sys.glob(paste0(data_dir, "/Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/*.csv.gz"))
files <- files[which(!str_detect(files, "-1|r1"))] # ignore positive control as doesn't have CEs
file <- files[1]
for(file in files){
print(file)
bc1 <- word(word(word(file, sep="/", -1, -1), sep="\\.", 1, 1), sep="_", 1, 1)
bc2 <- word(word(word(file, sep="/", -1, -1), sep="\\.", 1, 1), sep="_", 2, 2)
csv <- read_csv(file)
if(nrow(csv) == 0){
next
}
csv$flag_string[which(is.na(csv$flag_string))] <- ""
csv <- csv %>%
filter(reference == "triple_cryptic_cre") %>%
filter(!str_detect(flag_string, "not primary|supplementary")) %>%
filter(!is.na(junctions)) %>%
filter(str_detect(junctions, "2381-2475")) %>%
mutate(first_cryptic = str_detect(junctions, "235-399;492-721"),
second_cryptic = str_detect(junctions, "820-984;1083-1312"),
third_cryptic = str_detect(junctions, "1589-1753;1862-2091")) %>%
mutate(bc1 = bc1,
bc2 = bc2)
if(file == files[1]){
cryptic_df <- csv
} else {
cryptic_df <- bind_rows(cryptic_df, csv)
}
}
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f1_r2.csv.gz"
## Rows: 13 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f1_r3.csv.gz"
## Rows: 91 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f1_r4.csv.gz"
## Rows: 5 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f2_r2.csv.gz"
## Rows: 488 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f2_r3.csv.gz"
## Rows: 376 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f2_r4.csv.gz"
## Rows: 115 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f3_r2.csv.gz"
## Rows: 105 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f3_r3.csv.gz"
## Rows: 192 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f3_r4.csv.gz"
## Rows: 71 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f4_r2.csv.gz"
## Rows: 750 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f4_r3.csv.gz"
## Rows: 541 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f4_r4.csv.gz"
## Rows: 105 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f5_r2.csv.gz"
## Rows: 280 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f5_r3.csv.gz"
## Rows: 253 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f5_r4.csv.gz"
## Rows: 136 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f6_r2.csv.gz"
## Rows: 461 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f6_r3.csv.gz"
## Rows: 811 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f6_r4.csv.gz"
## Rows: 225 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f7_r2.csv.gz"
## Rows: 53 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f7_r3.csv.gz"
## Rows: 37 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f7_r4.csv.gz"
## Rows: 146 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f8_r2.csv.gz"
## Rows: 389 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f8_r3.csv.gz"
## Rows: 359 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "/Users/ogw/Library/CloudStorage/GoogleDrive-oscargwilkins@gmail.com/My Drive/UCL PhD/Year 4/Cryptic Gating Paper Figures and relevant code/all_data//Nanopore of triple cryptic cre/new_triple_cryptic_cre/junction_counts/f8_r4.csv.gz"
## Rows: 109 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- cryptic_df %>%
mutate(ce_string = paste(as.numeric(first_cryptic), as.numeric(second_cryptic),
as.numeric(third_cryptic), sep="_")) %>%
mutate(treatment = case_when(bc1 %in% c("f2", "f4", "f6", "f8") ~ "shTDP",
T ~ "NT")) %>%
mutate(n_CEs = case_when(bc2 == "r1" ~ 0,
bc2 == "r2" ~ 1,
bc2 == "r3" ~ 2,
bc2 == 'r4' ~ 3))
df2 <- df %>%
group_by(ce_string, bc1, bc2) %>%
mutate(n = sum(number_of_reads)) %>%
ungroup() %>%
distinct(bc1, bc2, ce_string, treatment, n_CEs, n) %>%
group_by(bc1, bc2) %>%
mutate(n_productive = sum(ifelse(ce_string == '1_1_1', 1, 0)*n)) %>%
mutate(n_total = sum(n)) %>%
mutate(pc_productive = 100*n_productive/sum(n)) %>%
distinct(n_CEs, treatment, pc_productive, n_productive, n_total) %>%
filter(n_total > 50)
ggplot(df2, aes(x = factor(n_CEs), y = pc_productive, fill = treatment)) +
geom_dotplot(binaxis = 'y', stackdir = 'center', binwidth = 0.7) +
theme_minimal() +
ylab("% productive transcripts") +
xlab('Number of CEs in construct') +
ggeasy::easy_add_legend_title('Treatment') +
ggsci::scale_color_npg()
ggsave("markdown_images/Cre/new_cre_plot.pdf", height = 6, width = 9, units = 'cm')
Creating an global p value is challenging as we cannot assume distribution is normal, and N=3 is unsuitable for non-parametric test.
We CAN create accurate p value within each replicate, showing that B5 and B11 are raised relative to mScarlet.
We can assume normality and use a ratio t-test for summaries across replicates, then correct for multiple testing (n=2 tests)
# Read in the quantifications from Pete
unc13a_df <- read_csv(paste0(data_dir, '/synapse_data/unc13a intensity.csv')) %>%
pivot_longer(cols = c('Mscarlet', 'B5', 'B11')) %>%
filter(!is.na(value)) %>%
group_by(replicate, name) %>%
mutate(average = mean(value)) %>%
mutate(name = str_replace(name, 'Mscarlet', 'mScarlet')) %>%
mutate(name = factor(name, levels = c('mScarlet', 'B5', 'B11'))) %>%
mutate(replicate = as.numeric(str_sub(replicate, 2, 2)))
## Rows: 76 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): replicate
## dbl (3): Mscarlet, B5, B11
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
unc13a_summary_df <- unc13a_df %>% distinct(name, replicate, average) %>% ungroup()
unc13a_wide_summary <- unc13a_summary_df %>%
pivot_wider(names_from = 'name', values_from = 'average')
print(t.test(log(unc13a_wide_summary$B5/unc13a_wide_summary$mScarlet), alternative = 'greater'))
##
## One Sample t-test
##
## data: log(unc13a_wide_summary$B5/unc13a_wide_summary$mScarlet)
## t = 4.8952, df = 2, p-value = 0.01964
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
## 0.1990963 Inf
## sample estimates:
## mean of x
## 0.4934265
print(t.test(log(unc13a_wide_summary$B11/unc13a_wide_summary$mScarlet), alternative = 'greater'))
##
## One Sample t-test
##
## data: log(unc13a_wide_summary$B11/unc13a_wide_summary$mScarlet)
## t = 8.22, df = 2, p-value = 0.00724
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
## 0.3227699 Inf
## sample estimates:
## mean of x
## 0.5005952
# Plot all values across replicates
p1 <- ggplot(unc13a_df, aes(x = name, y = value, fill = name)) +
geom_dotplot(binaxis = 'y', binwidth = 4, stackdir = 'center') +
facet_wrap(~paste0('Replicate ', replicate)) +
ylim(0, NA) +
theme_classic() +
ggsci::scale_fill_npg() +
ggeasy::easy_remove_legend() +
ylab('UNC13A fluoresence') +
xlab('Construct')
p1
# Plot summaries
p2 <- ggplot(unc13a_summary_df, aes(x = name, y = average,
colour = factor(replicate))) +
geom_bar(data= unc13a_summary_df %>%
group_by(name) %>%
mutate(m = mean(average)) %>%
distinct(name, m), aes(x = name, y = m),
fill = 'grey80', colour = 'grey60', stat='identity') +
geom_point(position = position_jitter(width = 0.1, height = 0)) +
ylim(0, NA) +
theme_classic() +
ggeasy::easy_add_legend_title('Replicate') +
ylab('UNC13A fluoresence') +
xlab('Construct')
p2
# Calculate extended Mann Whitney U (Kruskal Wallis) for each replicate
analyse_replicate <- function(replicate_number) {
this_df <- unc13a_df %>% filter(replicate == replicate_number)
k <- kruskal.test(value ~ name, data = this_df)
dt <- dunnTest(value ~ name, data = this_df)$res %>%
mutate(replicate = replicate_number)
return(dt)
}
combined_df <- map_dfr(1:3, analyse_replicate) %>%
mutate(full_padj = p.adjust(P.unadj, method='BH')) # arguably this is too conservative, but all relevant are significant anyway
p1
ggsave("markdown_images/synapse/unc13a_quant_split_by_replicate.pdf", height = 7, width = 14, units="cm")
ggsave("markdown_images/synapse/unc13a_quant_split_by_replicate.png", height = 7, width = 14, units="cm")
make_paired_data <- function(n = 3, multiply = 1, sd = 1, mean = 10){
x = rnorm(n = n, mean = mean, sd = sd)
y = rnorm(n = n, mean = mean*multiply, sd = sd*multiply)
result = t.test(log(y/x), alternative = 'greater')
return(result$p.value)
}
rep = 100000
null_true <- replicate(rep, make_paired_data(multiply=1))
null_false <- replicate(rep, make_paired_data(multiply=1.1))
print(sum(as.numeric(null_true < 0.05)) / rep)
## [1] 0.04968
print(sum(as.numeric(null_false < 0.05)) / rep)
## [1] 0.20292
# As expected, values of 0.05 and then >0.05
freq_df <- read_csv(paste0(data_dir, '/synapse_data/synapse frequency data.csv')) %>%
pivot_longer(cols = c('mScarlet', 'B5', 'B11')) %>%
filter(!is.na(value)) %>%
mutate(name = case_when(name == 'mScarlet' ~ 'mSc',
name == 'B5' ~ '#6',
name == 'B11' ~ '#9'))
## Rows: 28 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): replicate
## dbl (3): mScarlet, B5, B11
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ggplot(freq_df, aes(x = name, y = value, fill = name)) +
geom_dotplot(binaxis = 'y', stackdir = 'center', binwidth = 0.15) +
theme_classic() +
xlab('Construct') +
ylab('Frequency / Hz') +
ggeasy::easy_remove_legend() +
ggtitle('Patch clamping of piggyBac i3 Neurons')
ggsave("~/Downloads/frequency_data.png", height = 10, width = 10, units = 'cm')
# Read in a file with all the predictions for these proteins
full_df <- read_csv(paste0(data_dir, '/running_on_many_sequences/processed.csv.gz'))
## Rows: 248769 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): type, filename, protein
## dbl (3): rel_pos, value, score
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# And read in the commands that were used for SpliceNouveau
commands_ecoli <- read_csv(paste0(data_dir, '/running_on_many_sequences/uniprotkb_proteome_UP000000625_AND_revi_2024_01_18_commands.fixed.csv.gz'))
## Rows: 1891 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): command, protein_name
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ecoli_proteins <- unique(commands_ecoli$protein_name)
commands_human <- read_csv(paste0(data_dir, '/running_on_many_sequences/uniprotkb_proteome_UP000005640_AND_revi_2024_01_18_commands.fixed.csv.gz'))
## Rows: 1977 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): command, protein_name
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
human_proteins <- unique(commands_human$protein_name)
full_df2 <- full_df %>%
filter(abs(rel_pos) < 50) %>%
group_by(protein) %>%
filter(score == max(score)) %>%
filter(score > 1.8) %>%
mutate(type = factor(type, levels = c('Constitutive donor',
'Cryptic acceptor',
'Cryptic donor',
'Constitutive acceptor')))
n_distinct(full_df2$protein)
## [1] 197
p1 <- ggplot(full_df2 %>% filter(protein %in% ecoli_proteins), aes(x = rel_pos, y = factor(score), fill = value)) +
geom_tile() +
facet_grid(cols = vars(type), rows = NULL) +
scale_fill_viridis_c() +
#scale_fill_continuous(low = 'white', high = 'black') +
theme_classic() +
ggeasy::easy_add_legend_title('SpliceAI\nScore') +
ggtitle('First 100 E. coli proteins\n(3 failed due to short length)') +
ggeasy::easy_remove_y_axis() +
xlab('Position relative to splice site (nucleotides)')
p2 <- ggplot(full_df2 %>% filter(protein %in% human_proteins), aes(x = rel_pos, y = word(protein, 2, sep="_"), fill = value)) +
geom_tile() +
facet_grid(cols = vars(type), rows = NULL) +
scale_fill_viridis_c() +
#scale_fill_continuous(low = 'white', high = 'black') +
theme_classic() +
ggeasy::easy_add_legend_title('SpliceAI\nScore') +
ggtitle('First 100 Human proteins') +
ggeasy::easy_remove_y_axis() +
xlab('Position relative to splice site (nucleotides)')
p1/p2
ggsave('~/Downloads/loads_seqs.png', height = 13, width = 17, units = 'cm')
min_reads <- 1
x = 0.01
# Read in junction counts
nt <- read_csv(paste0(data_dir, "/SpliceNouveau_optimisation/14h.nt.gz")) %>% mutate(condition = 'NT')
## Rows: 14884 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dox <- read_csv(paste0(data_dir, "/SpliceNouveau_optimisation/14h.dox.gz")) %>% mutate(condition = 'shTDP')
## Rows: 14631 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- bind_rows(nt, dox) %>%
group_by(reference, condition) %>%
filter(sum(number_of_reads) >= min_reads) %>%
ungroup() %>%
mutate(RPS_spliced = str_detect(junctions, '1020-1114')) %>%
filter(RPS_spliced) %>%
mutate(productive = junctions == '334-483;1020-1114') %>%
mutate(opt_level = word(reference, 3, sep='_'),
attempt = word(reference, 2, sep='_')) %>%
group_by(reference, condition) %>%
mutate(frac_productive = sum(ifelse(productive, number_of_reads, 0))/sum(number_of_reads)) %>%
mutate(frac_correct_donor = sum(ifelse(str_detect(junctions, '334-'), number_of_reads, 0)) / sum(number_of_reads)) %>%
ungroup() %>%
group_by(reference, condition, junctions) %>%
mutate(n_this_junctions = sum(number_of_reads))
# Analyse what fraction use the correct donor
correct_donor_df <- df %>%
ungroup() %>%
distinct(reference, opt_level, condition, frac_correct_donor) %>%
group_by(reference)
p1 <- ggplot(correct_donor_df, aes(x = opt_level, y = 100*frac_correct_donor, fill = condition)) +
geom_boxplot() +
ylab('% usage expected\ndonor splice site') +
theme_classic() +
xlab('Optimisation level') +
ggsci::scale_fill_npg() +
ggeasy::easy_add_legend_title('Treatment')
# See if this holds if we exclude Intron Retention from the analysis
df_ignore_ir <- bind_rows(nt, dox) %>%
filter(junctions != '1020-1114') %>% # ignore IR
mutate(RPS_spliced = str_detect(junctions, '1020-1114')) %>%
filter(RPS_spliced) %>%
mutate(productive = junctions == '334-483;1020-1114') %>%
mutate(opt_level = word(reference, 3, sep='_'),
attempt = word(reference, 2, sep='_')) %>%
group_by(reference, condition) %>%
mutate(frac_productive = sum(ifelse(productive, number_of_reads, 0))/sum(number_of_reads)) %>%
mutate(frac_correct_donor = sum(ifelse(str_detect(junctions, '334-'), number_of_reads, 0)) / sum(number_of_reads)) %>%
ungroup() %>%
group_by(reference, condition, junctions) %>%
mutate(n_this_junctions = sum(number_of_reads))
correct_donor_df_ignore_ir <- df_ignore_ir %>%
ungroup() %>%
distinct(reference, opt_level, condition, frac_correct_donor)
p2 <- ggplot(correct_donor_df_ignore_ir, aes(x = opt_level, y = frac_correct_donor, fill = condition)) +
geom_boxplot() +
theme_classic() +
ylab('% usage expected donor\nsplice site (ignoring IR)') +
xlab('Optimisation level') +
ggeasy::easy_add_legend_title('Treatment') +
ggsci::scale_fill_npg()
# Analyse the fraction of productive transcripts. Does it increase with TDP-43 KD?
productive_df <- df %>%
ungroup() %>%
distinct(reference, opt_level, condition, frac_productive) %>%
group_by(reference) %>%
mutate(increase = max(ifelse(condition == 'shTDP', frac_productive, -1)) - max(ifelse(condition == 'NT', frac_productive, -1)))
p3 <- ggplot(productive_df, aes(x = opt_level, y = frac_productive, fill = condition)) +
geom_boxplot() +
theme_classic() +
ggsci::scale_fill_npg() +
xlab('Optimisation level') +
ylab('% Productive transcripts') +
ggeasy::easy_add_legend_title('Treatment')
increased_df <- productive_df %>%
distinct(reference, opt_level, increase) %>%
ungroup() %>%
group_by(opt_level) %>%
mutate(pc_that_increase = 100*sum(as.numeric(increase > 0.50)) / n_distinct(reference)) %>%
distinct(opt_level, pc_that_increase)
# p4 <- ggplot(increased_df, aes(x = opt_level, y = pc_that_increase, fill = pc_that_increase)) +
# geom_bar(stat='identity') +
# theme_classic() +
# xlab('Optimisation level') +
# ylab('% constructs >50% increase of\nproductive transcripts with TDP-43 KD') +
# ggtitle('Response to TDP-43 KD') +
# scale_fill_viridis_c() +
# ggeasy::easy_remove_legend()
p4 <- ggplot(productive_df %>% distinct(reference, opt_level, increase),
aes(x = factor(opt_level), y = 100*increase)) +
geom_violin(scale = 'width', fill = 'grey50') +
theme_classic() +
xlab('Optimisation level') +
ylab('% increase in productive transcripts') +
ggtitle('Response to TDP-43 KD')
(p1|p2)/(p3|p4) + plot_annotation(tag_levels = 'A')
ggsave('markdown_images/better_score_better_splice/combined_bsbs_figs.pdf', height = 14, width = 17, units = 'cm')
generate_full_image <- function(combined_positions, all_images, n_rows,
n_columns=6, dim_image=200, spacing_images=5,
spacing_wells=30, background_quantile=0.97){
# This function makes a plot from incucyte data.
# You need to supply a "combined_positions" dataframe that has values for
# plate, well and Order
# Currently it only works when you have four images per well (stupidly named "replicates")
height <- n_rows*(2*dim_image + spacing_images) + (n_rows-1)*spacing_wells + 1
width <- n_columns*(2*dim_image + spacing_images) + (n_columns-1)*spacing_wells + 1
full_image <- matrix(nrow = height, ncol = width, 0)
background <- 10000000 # set arbitrarily large initial values
for(image in all_images){
filename <- word(image, sep="/", start=-1, end = -1)
if(!filename %in% combined_positions$filename){
next
}
print(filename)
plate <- paste(ifelse(str_detect(image, "control plate"), 2, 1))
print(plate)
well = word(filename, sep="_", start=2, end=2)
print(well)
plate_column = as.numeric(str_sub(well, 2, 3))
print(plate_column)
#image_column = ifelse(plate_column %% 6 == 0, 6, plate_column %% 6)
image_column <- plate_column
image_row = unique(combined_positions$Order[which(combined_positions$well == well &
combined_positions$Plate == plate)])
print(image_row)
if(is.na(image_row)){
next
}
if("replicate" %in% colnames(combined_positions)){
replicate <- combined_positions$replicate[which(combined_positions$filename == filename)]
} else {
replicate = as.numeric(word(filename, sep="_", start=3, end=3))
}
if(replicate == 2){
replicate = 3
} else if(replicate == 3){
replicate = 2
}
image_matrix <- readImage(image)
smaller <- as.array(EBImage::resize(image_matrix, w=dim_image, h=dim_image))
well_top_left_x = (image_column - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
print('yo')
print(image_column)
print(well_top_left_x)
image_top_left_x = well_top_left_x + ifelse(replicate %in% c(2,4), dim_image + spacing_images, 0)
print(replicate)
print(image_top_left_x)
well_top_left_y = (image_row - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
print(well_top_left_y)
image_top_left_y = well_top_left_y + ifelse(replicate %in% c(3,4), dim_image + spacing_images, 0)
print(image_top_left_y)
full_image[image_top_left_y:(image_top_left_y+dim_image-1),
image_top_left_x:(image_top_left_x + dim_image-1)] <- smaller
this_background <- quantile(smaller, 0.05)
if(this_background < background){
background <- this_background
}
}
full_image2 <- full_image - background
img_invert <- Image(1 - t(full_image2/quantile(full_image2, background_quantile)))
return(img_invert)
}
plate_data <- read_csv(paste0(data_dir, "/12qn repeat/plate_info.csv")) %>%
mutate(cell_type = ifelse(cell_type == 'SK-N-DZ', 'SK-N-BE2', cell_type)) %>%
filter(cell_type == 'SK-N-BE2') %>%
mutate(row = str_sub(coordinate, 1, 1),
column = as.numeric(str_sub(coordinate, 2, 2))) %>%
mutate(replicate = as.numeric(str_sub(replicate, 2, 2))) %>%
dplyr::rename(experimental_replicate = replicate)
## Rows: 96 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): coordinate, transfection, cell_type, replicate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
red_12qn <- Sys.glob(paste0(data_dir, "/12qn repeat/orange/*"))
#red_12qn <- Sys.glob("~/Downloads/orange/*")
positions <- data.frame(filename = word(red_12qn, -1, sep="/")) %>%
mutate(well = word(filename, 2, sep="_"),
replicate = as.numeric(word(filename, 3, sep="_"))) %>%
mutate(row = str_sub(well, 1, 1),
column = as.numeric(str_sub(well, 2, 2))) %>%
left_join(data.frame(row = c("A", "B", 'C', 'D', 'E', 'F', 'G', 'H'),
Order = c(1, 2, 3, 4, 5, 6, 7, 8))) %>%
mutate(plate_column = column) %>%
mutate(Plate = 1) %>%
inner_join(plate_data) %>%
#mutate(plate_column = plate_column - min(plate_column) + 1) %>%
mutate(well = paste0(row, plate_column))
## Joining with `by = join_by(row)`
## Joining with `by = join_by(row, column)`
im <- generate_full_image(positions, red_12qn, n_rows = 4, background_quantile = 0.995, n_columns = 10)
## [1] "VID501_A5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 1
## [1] 1
## [1] "VID501_A5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 1
## [1] 206
## [1] "VID501_A5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 1
## [1] 1
## [1] "VID501_A5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 1
## [1] 206
## [1] "VID501_A6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 1
## [1] 1
## [1] "VID501_A6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 1
## [1] 206
## [1] "VID501_A6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 1
## [1] 1
## [1] "VID501_A6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 1
## [1] 206
## [1] "VID501_A7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 1
## [1] 1
## [1] "VID501_A7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 1
## [1] 206
## [1] "VID501_A7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 1
## [1] 1
## [1] "VID501_A7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 1
## [1] 206
## [1] "VID501_A8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 1
## [1] 1
## [1] "VID501_A8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 1
## [1] 206
## [1] "VID501_A8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 1
## [1] 1
## [1] "VID501_A8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 1
## [1] 206
## [1] "VID501_B5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 436
## [1] 436
## [1] "VID501_B5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 436
## [1] 641
## [1] "VID501_B5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 436
## [1] 436
## [1] "VID501_B5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 436
## [1] 641
## [1] "VID501_B6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 436
## [1] 436
## [1] "VID501_B6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 436
## [1] 641
## [1] "VID501_B6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 436
## [1] 436
## [1] "VID501_B6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 436
## [1] 641
## [1] "VID501_B7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 436
## [1] 436
## [1] "VID501_B7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 436
## [1] 641
## [1] "VID501_B7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 436
## [1] 436
## [1] "VID501_B7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 436
## [1] 641
## [1] "VID501_B8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 436
## [1] 436
## [1] "VID501_B8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 436
## [1] 641
## [1] "VID501_B8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 436
## [1] 436
## [1] "VID501_B8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 436
## [1] 641
## [1] "VID501_C5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 871
## [1] 871
## [1] "VID501_C5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 871
## [1] 1076
## [1] "VID501_C5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 871
## [1] 871
## [1] "VID501_C5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 871
## [1] 1076
## [1] "VID501_C6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 871
## [1] 871
## [1] "VID501_C6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 871
## [1] 1076
## [1] "VID501_C6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 871
## [1] 871
## [1] "VID501_C6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 871
## [1] 1076
## [1] "VID501_C7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 871
## [1] 871
## [1] "VID501_C7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 871
## [1] 1076
## [1] "VID501_C7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 871
## [1] 871
## [1] "VID501_C7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 871
## [1] 1076
## [1] "VID501_C8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 871
## [1] 871
## [1] "VID501_C8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 871
## [1] 1076
## [1] "VID501_C8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 871
## [1] 871
## [1] "VID501_C8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 871
## [1] 1076
## [1] "VID501_D5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 1306
## [1] 1306
## [1] "VID501_D5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 1306
## [1] 1511
## [1] "VID501_D5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 1306
## [1] 1306
## [1] "VID501_D5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 1306
## [1] 1511
## [1] "VID501_D6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 1306
## [1] 1306
## [1] "VID501_D6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 1306
## [1] 1511
## [1] "VID501_D6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 1306
## [1] 1306
## [1] "VID501_D6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 1306
## [1] 1511
## [1] "VID501_D7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 1306
## [1] 1306
## [1] "VID501_D7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 1306
## [1] 1511
## [1] "VID501_D7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 1306
## [1] 1306
## [1] "VID501_D7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 1306
## [1] 1511
## [1] "VID501_D8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 1306
## [1] 1306
## [1] "VID501_D8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 1306
## [1] 1511
## [1] "VID501_D8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 1306
## [1] 1306
## [1] "VID501_D8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 1306
## [1] 1511
rgb_red <- rgbImage(red = (1-im)*0.5, green = NULL, blue = NULL)
display(rgb_red)
writeImage(rgb_red, "markdown_images/12QN repeat/uncropped_mScarlet.png", quality=90)
green_12qn <- Sys.glob(paste0(data_dir, "/12qn repeat/green/*"))
positions <- data.frame(filename = word(green_12qn, -1, sep="/")) %>%
mutate(well = word(filename, 2, sep="_"),
replicate = as.numeric(word(filename, 3, sep="_"))) %>%
mutate(row = str_sub(well, 1, 1),
column = as.numeric(str_sub(well, 2, 2))) %>%
left_join(data.frame(row = c("A", "B", 'C', 'D', 'E', 'F', 'G', 'H'),
Order = c(1, 2, 3, 4, 5, 6, 7, 8))) %>%
mutate(plate_column = column) %>%
mutate(Plate = 1) %>%
inner_join(plate_data) %>%
#mutate(plate_column = plate_column - min(plate_column) + 1) %>%
mutate(well = paste0(row, plate_column))
## Joining with `by = join_by(row)`
## Joining with `by = join_by(row, column)`
im_green <- generate_full_image(positions, green_12qn, n_rows = 4, background_quantile = 1, n_columns = 10)
## [1] "VID501_A5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 1
## [1] 1
## [1] "VID501_A5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 1
## [1] 206
## [1] "VID501_A5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 1
## [1] 1
## [1] "VID501_A5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A5"
## [1] 5
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 1
## [1] 206
## [1] "VID501_A6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 1
## [1] 1
## [1] "VID501_A6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 1
## [1] 206
## [1] "VID501_A6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 1
## [1] 1
## [1] "VID501_A6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A6"
## [1] 6
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 1
## [1] 206
## [1] "VID501_A7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 1
## [1] 1
## [1] "VID501_A7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 1
## [1] 206
## [1] "VID501_A7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 1
## [1] 1
## [1] "VID501_A7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A7"
## [1] 7
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 1
## [1] 206
## [1] "VID501_A8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 1
## [1] 1
## [1] "VID501_A8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 1
## [1] 206
## [1] "VID501_A8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 1
## [1] 1
## [1] "VID501_A8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "A8"
## [1] 8
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 1
## [1] 206
## [1] "VID501_B5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 436
## [1] 436
## [1] "VID501_B5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 436
## [1] 641
## [1] "VID501_B5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 436
## [1] 436
## [1] "VID501_B5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B5"
## [1] 5
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 436
## [1] 641
## [1] "VID501_B6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 436
## [1] 436
## [1] "VID501_B6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 436
## [1] 641
## [1] "VID501_B6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 436
## [1] 436
## [1] "VID501_B6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B6"
## [1] 6
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 436
## [1] 641
## [1] "VID501_B7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 436
## [1] 436
## [1] "VID501_B7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 436
## [1] 641
## [1] "VID501_B7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 436
## [1] 436
## [1] "VID501_B7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B7"
## [1] 7
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 436
## [1] 641
## [1] "VID501_B8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 436
## [1] 436
## [1] "VID501_B8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 436
## [1] 641
## [1] "VID501_B8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 436
## [1] 436
## [1] "VID501_B8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "B8"
## [1] 8
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 436
## [1] 641
## [1] "VID501_C5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 871
## [1] 871
## [1] "VID501_C5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 871
## [1] 1076
## [1] "VID501_C5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 871
## [1] 871
## [1] "VID501_C5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C5"
## [1] 5
## [1] 3
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 871
## [1] 1076
## [1] "VID501_C6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 871
## [1] 871
## [1] "VID501_C6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 871
## [1] 1076
## [1] "VID501_C6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 871
## [1] 871
## [1] "VID501_C6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C6"
## [1] 6
## [1] 3
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 871
## [1] 1076
## [1] "VID501_C7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 871
## [1] 871
## [1] "VID501_C7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 871
## [1] 1076
## [1] "VID501_C7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 871
## [1] 871
## [1] "VID501_C7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C7"
## [1] 7
## [1] 3
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 871
## [1] 1076
## [1] "VID501_C8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 871
## [1] 871
## [1] "VID501_C8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 871
## [1] 1076
## [1] "VID501_C8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 871
## [1] 871
## [1] "VID501_C8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "C8"
## [1] 8
## [1] 3
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 871
## [1] 1076
## [1] "VID501_D5_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 1306
## [1] 1306
## [1] "VID501_D5_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 1306
## [1] 1511
## [1] "VID501_D5_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 1306
## [1] 1306
## [1] "VID501_D5_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D5"
## [1] 5
## [1] 4
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 1306
## [1] 1511
## [1] "VID501_D6_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 1306
## [1] 1306
## [1] "VID501_D6_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 1306
## [1] 1511
## [1] "VID501_D6_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 1306
## [1] 1306
## [1] "VID501_D6_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D6"
## [1] 6
## [1] 4
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 1306
## [1] 1511
## [1] "VID501_D7_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 1306
## [1] 1306
## [1] "VID501_D7_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 1306
## [1] 1511
## [1] "VID501_D7_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 1306
## [1] 1306
## [1] "VID501_D7_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D7"
## [1] 7
## [1] 4
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 1306
## [1] 1511
## [1] "VID501_D8_1_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 1306
## [1] 1306
## [1] "VID501_D8_2_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 1306
## [1] 1511
## [1] "VID501_D8_3_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 1306
## [1] 1306
## [1] "VID501_D8_4_2023y12m01d_13h16m.png"
## [1] "1"
## [1] "D8"
## [1] 8
## [1] 4
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 1306
## [1] 1511
rgb_green <- rgbImage(red = NULL, green = 1-im_green, blue = NULL)
display(rgb_green)
writeImage(rgb_green, "markdown_images/12QN repeat/uncropped_green.png", quality=90)
and how with HEK cells
generate_full_image <- function(combined_positions, all_images, n_rows,
n_columns=6, dim_image=200, spacing_images=5,
spacing_wells=30, background_quantile=0.97){
# This function makes a plot from incucyte data.
# You need to supply a "combined_positions" dataframe that has values for
# plate, well and Order
# Currently it only works when you have four images per well (stupidly named "replicates")
height <- n_rows*(2*dim_image + spacing_images) + (n_rows-1)*spacing_wells + 1
width <- n_columns*(2*dim_image + spacing_images) + (n_columns-1)*spacing_wells + 1
full_image <- matrix(nrow = height, ncol = width, 0)
background <- 10000000 # set arbitrarily large initial values
for(image in all_images){
this_filename <- word(image, sep="/", start=-1, end = -1)
this_data <- combined_positions %>% filter(filename == this_filename)
if(!this_filename %in% combined_positions$filename){
next
}
plate <- paste(ifelse(str_detect(image, "control plate"), 2, 1))
print(plate)
well = this_data$well[1]
plate_column = this_data$column[1]
#image_column = ifelse(plate_column %% 6 == 0, 6, plate_column %% 6)
image_column <- plate_column
image_row = unique(combined_positions$Order[which(combined_positions$well == well &
combined_positions$Plate == plate)])
print(image_row)
if(is.na(image_row)){
next
}
if("replicate" %in% colnames(combined_positions)){
replicate <- combined_positions$replicate[which(combined_positions$filename == this_filename)]
} else {
replicate = as.numeric(word(filename, sep="_", start=3, end=3))
}
if(replicate == 2){
replicate = 3
} else if(replicate == 3){
replicate = 2
}
image_matrix <- readImage(image)
smaller <- as.array(EBImage::resize(image_matrix, w=dim_image, h=dim_image))
well_top_left_x = (image_column - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
print('yo')
print(image_column)
print(well_top_left_x)
image_top_left_x = well_top_left_x + ifelse(replicate %in% c(2,4), dim_image + spacing_images, 0)
print(replicate)
print(image_top_left_x)
well_top_left_y = (image_row - 1)*(2*dim_image + spacing_images + spacing_wells) + 1
print(well_top_left_y)
image_top_left_y = well_top_left_y + ifelse(replicate %in% c(3,4), dim_image + spacing_images, 0)
print(image_top_left_y)
full_image[image_top_left_y:(image_top_left_y+dim_image-1),
image_top_left_x:(image_top_left_x + dim_image-1)] <- smaller
this_background <- quantile(smaller, 0.05)
if(this_background < background){
background <- this_background
}
}
full_image2 <- full_image - background
img_invert <- Image(1 - t(full_image2/quantile(full_image2, background_quantile)))
return(img_invert)
}
plate_data <- bind_rows(data.frame(column = 1:12, row = 'A'), data.frame(column = 1:12, row = 'B')) %>%
mutate(cell_type = 'HEK293T') %>%
mutate(plasmid = case_when(column <= 3 ~ 'untransfected',
column <= 6 ~ '12QN',
column <= 9 ~ 'WT',
column <= 12 ~ 'SNAP-only'))
red_12qn <- Sys.glob(paste0(data_dir, "/12qn repeat/hek293T/2024.05.08_12QN Orange 400ms HEK293T in PBS (Incucyte S5B)/orange/*"))
#red_12qn <- Sys.glob("~/Downloads/orange/*")
positions <- data.frame(filename = word(red_12qn, -1, sep="/")) %>%
mutate(well = word(filename, 2, sep="_"),
replicate = as.numeric(word(filename, 3, sep="_"))) %>%
mutate(row = str_sub(well, 1, 1),
column = as.numeric(str_sub(well, 2, 3))) %>%
left_join(data.frame(row = c("A", "B", 'C', 'D', 'E', 'F', 'G', 'H'),
Order = c(1, 2, 3, 4, 5, 6, 7, 8))) %>%
mutate(plate_column = column) %>%
mutate(Plate = 1) %>%
inner_join(plate_data) %>%
#mutate(plate_column = plate_column - min(plate_column) + 1) %>%
mutate(well = paste0(row, plate_column))
## Joining with `by = join_by(row)`
## Joining with `by = join_by(row, column)`
im <- generate_full_image(positions, red_12qn, n_rows = 2, background_quantile = 0.999, n_columns = 12)
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 1
## [1] 3916
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 3
## [1] 3916
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 2
## [1] 4121
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 4
## [1] 4121
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 1
## [1] 4351
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 3
## [1] 4351
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 2
## [1] 4556
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 4
## [1] 4556
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 1
## [1] 4786
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 3
## [1] 4786
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 2
## [1] 4991
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 4
## [1] 4991
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 3
## [1] 1
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 2
## [1] 206
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 4
## [1] 206
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 1
## [1] 436
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 3
## [1] 436
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 2
## [1] 641
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 4
## [1] 641
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 1
## [1] 871
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 3
## [1] 871
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 2
## [1] 1076
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 4
## [1] 1076
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 1
## [1] 1306
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 3
## [1] 1306
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 2
## [1] 1511
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 4
## [1] 1511
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 1
## [1] 3481
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 3
## [1] 3481
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 2
## [1] 3686
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 4
## [1] 3686
## [1] 1
## [1] 206
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 1
## [1] 3916
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 3
## [1] 3916
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 2
## [1] 4121
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 4
## [1] 4121
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 1
## [1] 4351
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 3
## [1] 4351
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 2
## [1] 4556
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 4
## [1] 4556
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 1
## [1] 4786
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 3
## [1] 4786
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 2
## [1] 4991
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 4
## [1] 4991
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 3
## [1] 1
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 2
## [1] 206
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 4
## [1] 206
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 1
## [1] 436
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 3
## [1] 436
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 2
## [1] 641
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 4
## [1] 641
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 1
## [1] 871
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 3
## [1] 871
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 2
## [1] 1076
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 4
## [1] 1076
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 1
## [1] 1306
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 3
## [1] 1306
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 2
## [1] 1511
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 4
## [1] 1511
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 1
## [1] 3481
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 3
## [1] 3481
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 2
## [1] 3686
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 4
## [1] 3686
## [1] 436
## [1] 641
rgb_red <- rgbImage(red = (1-im)*0.5, green = NULL, blue = NULL)
display(rgb_red)
writeImage(rgb_red, "markdown_images/12QN repeat/hek293T/uncropped_mScarlet.png", quality=90)
green_12qn <- Sys.glob(paste0(data_dir, "/12qn repeat/hek293T/2024.05.08_12QN Orange 400ms HEK293T in PBS (Incucyte S5B)/green/*"))
positions <- data.frame(filename = word(green_12qn, -1, sep="/")) %>%
mutate(well = word(filename, 2, sep="_"),
replicate = as.numeric(word(filename, 3, sep="_"))) %>%
mutate(row = str_sub(well, 1, 1),
column = as.numeric(str_sub(well, 2, 3))) %>%
left_join(data.frame(row = c("A", "B", 'C', 'D', 'E', 'F', 'G', 'H'),
Order = c(1, 2, 3, 4, 5, 6, 7, 8))) %>%
mutate(plate_column = column) %>%
mutate(Plate = 1) %>%
inner_join(plate_data) %>%
#mutate(plate_column = plate_column - min(plate_column) + 1) %>%
mutate(well = paste0(row, plate_column))
## Joining with `by = join_by(row)`
## Joining with `by = join_by(row, column)`
im_green <- generate_full_image(positions, green_12qn, n_rows = 2, background_quantile = 0.999, n_columns = 12)
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 1
## [1] 3916
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 3
## [1] 3916
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 2
## [1] 4121
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 4
## [1] 4121
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 1
## [1] 4351
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 3
## [1] 4351
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 2
## [1] 4556
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 4
## [1] 4556
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 1
## [1] 4786
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 3
## [1] 4786
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 2
## [1] 4991
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 4
## [1] 4991
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 3
## [1] 1
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 2
## [1] 206
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 1
## [1] 1
## [1] 4
## [1] 206
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 1
## [1] 436
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 3
## [1] 436
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 2
## [1] 641
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 2
## [1] 436
## [1] 4
## [1] 641
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 1
## [1] 871
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 3
## [1] 871
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 2
## [1] 1076
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 3
## [1] 871
## [1] 4
## [1] 1076
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 1
## [1] 1306
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 3
## [1] 1306
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 2
## [1] 1511
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 4
## [1] 1511
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 1
## [1] 3481
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 3
## [1] 3481
## [1] 1
## [1] 206
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 2
## [1] 3686
## [1] 1
## [1] 1
## [1] "1"
## [1] 1
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 4
## [1] 3686
## [1] 1
## [1] 206
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 1
## [1] 3916
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 3
## [1] 3916
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 2
## [1] 4121
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 10
## [1] 3916
## [1] 4
## [1] 4121
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 1
## [1] 4351
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 3
## [1] 4351
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 2
## [1] 4556
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 11
## [1] 4351
## [1] 4
## [1] 4556
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 1
## [1] 4786
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 3
## [1] 4786
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 2
## [1] 4991
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 12
## [1] 4786
## [1] 4
## [1] 4991
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 3
## [1] 1
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 2
## [1] 206
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 1
## [1] 1
## [1] 4
## [1] 206
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 1
## [1] 436
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 3
## [1] 436
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 2
## [1] 641
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 2
## [1] 436
## [1] 4
## [1] 641
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 1
## [1] 871
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 3
## [1] 871
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 2
## [1] 1076
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 3
## [1] 871
## [1] 4
## [1] 1076
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 1
## [1] 1306
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 3
## [1] 1306
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 2
## [1] 1511
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 4
## [1] 1306
## [1] 4
## [1] 1511
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 1
## [1] 1741
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 3
## [1] 1741
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 2
## [1] 1946
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 5
## [1] 1741
## [1] 4
## [1] 1946
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 1
## [1] 2176
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 3
## [1] 2176
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 2
## [1] 2381
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 6
## [1] 2176
## [1] 4
## [1] 2381
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 1
## [1] 2611
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 3
## [1] 2611
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 2
## [1] 2816
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 7
## [1] 2611
## [1] 4
## [1] 2816
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 1
## [1] 3046
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 3
## [1] 3046
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 2
## [1] 3251
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 8
## [1] 3046
## [1] 4
## [1] 3251
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 1
## [1] 3481
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 3
## [1] 3481
## [1] 436
## [1] 641
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 2
## [1] 3686
## [1] 436
## [1] 436
## [1] "1"
## [1] 2
## [1] "yo"
## [1] 9
## [1] 3481
## [1] 4
## [1] 3686
## [1] 436
## [1] 641
rgb_green <- rgbImage(red = NULL, green = 1-im_green, blue = NULL)
display(rgb_green)
writeImage(rgb_green, "markdown_images/12QN repeat/hek293T/uncropped_green.png", quality=90)
image_files <- Sys.glob(paste0(data_dir, "/sami/4x/*.png"))
mini_spacing = 5
big_spacing = 15
dim_image = 200
image_df <- data.frame(filename = word(image_files, -1, sep="/"),
full_dir = image_files) %>%
mutate(well = word(filename, 2, sep="_")) %>%
mutate(row = str_sub(well,1,1)) %>%
mutate(row_number = case_when(row == "A" ~ 1,
row == "B" ~ 2,
row == "C" ~ 3,
row == "D" ~ 4,
row == "E" ~ 5,
row == "F" ~ 6,
row == "G" ~ 7,
row == "H" ~ 8)) %>%
mutate(column = as.numeric(str_sub(well, 2, -1)),
image_number = as.numeric(word(filename, 3, sep="_"))) %>%
filter(column <= 6) %>% # just b5
mutate(big_row = ifelse(column <= 3, row_number, row_number+8),
small_row = ifelse(image_number > 2, 2, 1),
big_column = ifelse(column <= 3, column, column -3),
small_column = ifelse(image_number > 2, image_number - 2, image_number)) %>%
mutate(n_mini_spaces_x = big_column - 1 + small_column - 1,
n_big_spaces_x = big_column - 1,
n_mini_spaces_y = big_row - 1 + small_row - 1,
n_big_spaces_y = big_row - 1,
n_previous_images_x = 2*(big_column-1) + small_column - 1,
n_previous_images_y = 2*(big_row-1) + small_row - 1) %>%
mutate(start_x = 1+n_big_spaces_x*big_spacing + n_mini_spaces_x*mini_spacing + n_previous_images_x*dim_image) %>%
mutate(start_y = 1+n_big_spaces_y*big_spacing + n_mini_spaces_y*mini_spacing + n_previous_images_y*dim_image) %>%
mutate(shRNA = case_when(row == "A" & column %in% 1:3 ~ 'FUS shRNA1',
row == "B" & column %in% 1:3 ~ 'FUS shRNA2',
row == "C" & column %in% 1:3 ~ 'FUS shRNA3',
row == "D" & column %in% 1:3 ~ 'hnRNPA1 shRNA1',
row == "E" & column %in% 1:3 ~ 'hnRNPA1 shRNA2',
row == "F" & column %in% 1:3 ~ 'hnRNPA1 shRNA3',
row == "G" & column %in% 1:3 ~ 'hnRNPC shRNA1',
row == "H" & column %in% 1:3 ~ 'hnRNPC shRNA2',
row == "A" & column %in% 4:6 ~ 'hnRNPC shRNA3',
row == "B" & column %in% 4:6 ~ 'hnRNPK shRNA1',
row == "C" & column %in% 4:6 ~ 'hnRNPK shRNA2',
row == "D" & column %in% 4:6 ~ 'hnRNPK shRNA3',
row == "E" & column %in% 4:6 ~ 'TDP-43 shRNA1',
row == "F" & column %in% 4:6 ~ 'TDP-43 shRNA2',
row == "G" & column %in% 4:6 ~ 'TDP-43 shRNA3',
row == "H" & column %in% 4:6 ~ 'control'))
# Find common normalisation factor
for(filename in image_df$full_dir){
if(filename == image_df$full_dir[1]){
all_vals <- sample(as.vector(readImage(filename)), 100)
} else {
all_vals <- c(all_vals, sample(as.vector(readImage(filename)), 100))
}
}
normalisation <- quantile(all_vals, 1)
background_df <- image_df %>% filter(row == "A")
# Find background
for(filename in background_df$full_dir){
if(filename == background_df$full_dir[1]){
all_vals2 <- sample(as.vector(readImage(filename)), 100)
} else {
all_vals2 <- c(all_vals2, sample(as.vector(readImage(filename)), 100))
}
}
background <- quantile(all_vals, 0.0)
height = max(image_df$start_y) + dim_image*2
width = max(image_df$start_x) + dim_image*2
full_image <- matrix(nrow = height, ncol = width, 0)
for(i in 1:nrow(image_df)){
this_image <- readImage(image_df$full_dir[i])
smaller <- as.array(EBImage::resize(this_image, w=dim_image*2, h=dim_image*2))
start_x = image_df$start_x[i]
start_y = image_df$start_y[i]
full_image[start_y:(start_y+dim_image*2-1), start_x:(start_x+dim_image*2-1)] <- smaller
}
rgbimg <- rgbImage(t((full_image-background)/normalisation))
writeImage(rgbimg, "markdown_images/sami/4x_red.png", quality=90)
image_df$mean_intensity <- -1
for(i in 1:nrow(image_df)){
this_image <- readImage(image_df$full_dir[i])
mean_intensity <- mean(this_image)
image_df$mean_intensity[i] <- mean_intensity
}
ggplot(image_df, aes(x = shRNA, y = mean_intensity, colour = str_detect(shRNA, 'TDP'))) +
geom_point(alpha = 0.7) +
theme_classic() +
ggeasy::easy_rotate_x_labels(side = 'right') +
ggsci::scale_colour_npg() +
ggeasy::easy_remove_legend() +
ylab('Mean intensity/(arbitrary units)') +
ylim(0, NA)
ggsave("markdown_images/sami/sami_quants.png", height=10, width=12, units='cm')
ggsave("markdown_images/sami/sami_quants.pdf", height=10, width=12, units='cm')
df <- parse_qiaxcel_output(paste0(data_dir, "/i3 neurons/puja cDNA/C220830A16_2024-05-17_2203_20240517_053603_Rw.csv")) %>%
mutate(value2 = as.numeric(value2))
## Rows: 10905 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (18): Time, RFU(A01), RFU(A02), RFU(A03), RFU(A04), RFU(A05), RFU(A06), ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `value2 = as.numeric(value2)`.
## Caused by warning:
## ! NAs introduced by coercion
df2 <- preprocess_dataframe(df)
id_df <- df %>%
distinct(Row, sample) %>%
filter(str_length(Row) == 1) %>%
filter(sample < 8) %>%
mutate(sample_name = case_when(sample == 1 ~ "WT -protac",
sample == 2 ~ "Halo +protac",
sample == 3 ~ "B5 +protac",
sample == 4 ~ "B11 +protac",
sample == 5 ~ "mScarlet +protac",
sample == 6 ~ "WT +protac",
sample == 7 ~ "Halo -protac")) %>%
mutate(unique_id = paste0(Row, sample))
ggplot(df2 %>% inner_join(id_df) %>%
#filter(row == 'A') %>%
filter(sample_name %in% c('Halo -protac', 'Halo +protac', 'mScarlet +protac', 'B5 +protac', 'B11 +protac')),
aes(y = index_for_plotting, fill = corrected_value,
x = factor(sample_name, levels = c('Halo -protac', 'Halo +protac', 'mScarlet +protac', 'B5 +protac', 'B11 +protac')))) +
geom_tile() +
theme_classic() +
scale_fill_gradient(low = "white", high = "black") +
ylim(0.55,0.8) +
facet_wrap(~row)
## Joining with `by = join_by(unique_id)`
## Warning: Removed 48763 rows containing missing values (`geom_tile()`).
ggsave("markdown_images/i3 neurons/RT-PCR unc13a - all three replicates plot.pdf", height=20, width=20, units='cm')
## Warning: Removed 48763 rows containing missing values (`geom_tile()`).
molar_ratios <- df2 %>%
mutate(band = case_when(abs(corrected_index - 0.6) < 0.02 ~ "lower_band",
abs(corrected_index - 0.72) < 0.035 ~ "upper_band",
T ~ "ignore")) %>%
mutate(product_length = case_when(band == "upper_band" ~ 405+128,
band == "lower_band" ~ 405,
T ~ 0)) %>%
group_by(unique_id, band) %>%
mutate(integrated_area = sum(corrected_value)) %>%
distinct(unique_id, band, integrated_area, product_length) %>%
mutate(molar_value = integrated_area / product_length) %>%
filter(band != "ignore") %>%
ungroup() %>%
dplyr::select(-product_length, -integrated_area) %>%
pivot_wider(names_from = band, values_from = molar_value) %>%
mutate(molar_fraction_lower_band = lower_band/(lower_band+upper_band)) %>%
inner_join(id_df) %>%
group_by(sample_name) %>%
mutate(mean = mean(molar_fraction_lower_band),
sd = sd(molar_fraction_lower_band))
## Joining with `by = join_by(unique_id)`
ggplot(molar_ratios, aes(x = sample_name, y = molar_fraction_lower_band)) +
geom_point()
ggplot(molar_ratios %>% filter(!str_detect(sample_name, "WT")),
aes(x = factor(sample_name, levels = c('Halo -protac', 'Halo +protac', 'mScarlet +protac', 'B5 +protac', 'B11 +protac')),
y = 100-100*molar_fraction_lower_band)) +
geom_dotplot(binaxis = 'y', stackdir = 'center') +
theme_classic() +
ylab('UNC13A CE PSI')
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
ggsave("markdown_images/i3 neurons/RT-PCR unc13a.pdf", height=4.5, width=4, units='cm')
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
files <- Sys.glob(paste0(data_dir, "/raver_spinal_cord/*.bam"))
for(file in files){
df <- data.frame(scanBam(file)) %>%
mutate(name = file)
if(file == files[1]){
full_df <- df
} else {
full_df <- bind_rows(full_df, df)
}
}
full_df2 <- full_df %>%
filter(str_detect(name, "barcode0")) %>%
filter(!str_detect(name, "barcode09")) %>%
filter(rname == 'b11') %>%
filter(flag %in% c(0, 16)) %>%
mutate(ce = case_when(str_detect(cigar, "121N") & str_detect(cigar, "145N") ~ 'CE',
str_detect(cigar, '357N') ~ 'no_CE',
T ~ 'unclear')) %>%
group_by(name, ce) %>%
mutate(n = n()) %>%
distinct(n) %>%
mutate(barcode = as.numeric(str_sub(word(name, 2, sep='barcode'), 2, 2)))
full_df3 <- full_df2 %>%
ungroup() %>%
group_by(barcode) %>%
mutate(pc_ce = 100*max(ifelse(ce == 'CE', n, -1)) / sum(n)) %>%
distinct(pc_ce, barcode) %>%
mutate(condition = factor(ifelse(barcode %% 2 == 1, 'cKO', 'WT'),
levels = c('WT', 'cKO'))) %>%
ungroup() %>%
group_by(condition) %>%
mutate(m = mean(pc_ce))
ggplot(full_df3, aes(x = condition, y = pc_ce, fill = condition)) +
geom_dotplot(binaxis = 'y', stackdir = 'center', binwidth = 0.2, dotsize = 5, alpha = 0.8) +
ggtitle('Nanopore analysis of B11 in spinal cord') +
theme_classic() +
ggsci::scale_fill_npg() +
ggeasy::easy_remove_legend() +
ggtitle("") +
ylab('Cryptic #9 PSI \n(mouse spinal cord)')
ggsave("markdown_images/raver_spinal_cord/nanopore_results.pdf", height=5, width=4, units='cm')
# Now using extraction of junctions...
files <- Sys.glob(paste0(data_dir, "/raver_spinal_cord/*.csv.gz"))
all_df <- map_df(files, function(file){
df <- read_csv(file) %>% mutate(filename = word(file, -1, sep='/'))
}) %>%
mutate(barcode = as.numeric(str_sub(filename, 2, 2))) %>%
filter(!str_detect(flag_string, 'alignment')) %>%
mutate(condition = factor(ifelse(barcode %% 2 == 1, 'cKO', 'WT'),
levels = c('WT', 'cKO'))) %>%
mutate(isoform = case_when(junctions == '730-1088' ~ 'no_CE',
junctions == '730-852;942-1088' ~ 'with_CE',
T ~ 'other')) %>%
group_by(barcode, isoform) %>%
mutate(n_this_isoform = sum(number_of_reads)) %>%
ungroup() %>%
group_by(barcode) %>%
mutate(frac_this_isoform = n_this_isoform/sum(number_of_reads)) %>%
distinct(barcode, condition, n_this_isoform, frac_this_isoform, isoform) %>%
mutate(frac_ce = max(ifelse(isoform == 'with_CE', n_this_isoform, -1)) / sum(n_this_isoform))
## Rows: 3461 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3419 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 2629 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3117 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 2874 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3798 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 2251 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3444 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): reference, flag_string, strand, junctions
## dbl (5): mapping_quality, flag, first_pos, last_pos, number_of_reads
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ggplot(all_df %>% distinct(barcode, condition, frac_ce), aes(x = condition, y = 100*frac_ce, fill = condition)) +
geom_dotplot(binaxis = 'y', stackdir = 'center', binwidth = 0.2, dotsize = 5, alpha = 0.8) +
ggtitle('Nanopore analysis of B11 in spinal cord') +
theme_classic() +
ggsci::scale_fill_npg() +
ggeasy::easy_remove_legend() +
ggtitle("") +
ylab('Cryptic #9 PSI \n(mouse spinal cord)')
ggsave("markdown_images/raver_spinal_cord/nanopore_results2.pdf", height=5, width=4, units='cm')
df <- read_csv(paste0(data_dir, '/STMN2 western blot quantification/SKNBE2 western blot quantifications with STMN2 - Sheet1 (1).csv')) %>%
filter(! lane == 'background') # background was zero in all cases due to quantification method used
## Rows: 83 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): lane, target, blot
## dbl (1): value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df2 <- df %>%
mutate(line = case_when(lane %in% 1:2 ~ 'Constitutive',
lane %in% 3:4 ~ '#6',
lane %in% 5:6 ~ '#9',
lane %in% 7:8 ~ 'mSc')) %>%
mutate(line = factor(line, levels = c('Constitutive', '#6', '#9', 'mSc'))) %>%
mutate(condition = case_when(lane %in% c(1, 3, 5, 7) ~ 'NT',
T ~ 'shTDP')) %>%
group_by(blot, target, line) %>%
dplyr::select(-lane) %>%
pivot_wider(names_from = target, values_from = value) %>%
mutate(normalised_STMN = STMN / tubulin) %>%
dplyr::select(- STMN, -tubulin) %>%
pivot_wider(names_from = condition, values_from = normalised_STMN) %>%
mutate(fraction_of_NT = shTDP/NT) %>%
mutate(polyclonal_replicate = case_when(str_detect(blot, 'box2') ~ 'first',
T ~ blot)) %>%
ungroup() %>%
group_by(polyclonal_replicate, line) %>%
mutate(mean_fraction_of_NT = mean(fraction_of_NT))
df3 <- df2 %>% ungroup() %>% distinct(polyclonal_replicate, line, mean_fraction_of_NT)
ggplot(df3, aes(x = line, y = 100*mean_fraction_of_NT)) +
geom_point(alpha = 0.5) +
ylab('% STMN2 remaining') +
xlab('Vector') +
#ggtitle('Quantification of STMN2 from western blots') +
xlab('Construct') +
theme_classic()
ggsave("markdown_images/STMN2 western blots/quantification.pdf", height=6.5, width=7, units='cm')
backbones <- data.frame(seq = Biostrings::readDNAStringSet('small_data_files/vector_sequences/backbones.fa')) %>%
rownames_to_column('seq_name')
# TDP-REGv1 mCherry (pTwist)
r3 <- paste(readDNAStringSet('small_data_files/vector_sequences/mCherry/for-patent-aars1-rsp24-in-ptwist-cmv (1).fasta')[1])
r3_pos <- str_replace(str_replace(r3, 'GTAAGAATGCACATCACTTCTTGAGAGTATGGAGGAGTGAAATGACACTCAGTGCCAGAGTTACTGTATATCTACACTTTAAAAGTGTAGCTTTTAAAAGATAAGCAAGCACAATCTTTTGTGTGTGTGTGTGTGAATGTGTGTGTGTGTGTGTGTCACCCAG', ''),
'GTATGCATCACCCCCCCAGCTAATTTTTTTTTGTATTTTTTACCGAGTCGGGGTTTCGCAATGTTGCCCAGGCTGGTCTCAGAGTCTCGCTCTGTTGTCTACGCTGGAGTGCAGTAACATGAGCCACTGTGCCCGGCCAATCCTAAGAATTTCTTTTGCGGTGGTTGCAAGTCTGGGCAGAACTCTTGTCAGGGGCTGTAACTGGACTTATCTTTACTCCTTTGTCAG', '')
mcherry_df <- data.frame(vector_name = c('TDP-REGv1 mCherry reporter', 'TDP-REGv1 mCherry reporter positive control'),
sequence = c(r3, r3_pos))
all_vectors <- mcherry_df
# TDP-REGv2 mScarlet vectors (pTwist)
positions <- read_csv("small_data_files/Plate positions for 12_05_2022 incucyte.csv") %>%
mutate(row = 1:n())
## Rows: 20 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Construct, Position, Type
## dbl (2): Plate, Order
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mscar_fa <- data.frame(insert = Biostrings::readDNAStringSet('small_data_files/vector_sequences/TDP-REGv2 mScarlet vectors.fa')) %>%
rownames_to_column('Construct')
mscar_df <- mscar_fa %>%
inner_join(positions) %>%
bind_rows(mscar_fa %>% filter(Construct == 'positive_control') %>% dplyr::select(insert) %>% mutate(vector_name = 'mScarlet positive control')) %>%
mutate(vector_name = case_when(is.na(Order) ~ paste0('TDP-REGv2 mScarlet positive control in pTwist backbone'),
T ~ paste0('TDP-REGv2 mScarlet #', Order, ' in pTwist backbone'))) %>%
mutate(sequence = paste0(backbones$seq[which(backbones$seq_name == 'mscar_upstream')],
insert,
backbones$seq[which(backbones$seq_name == 'mscar_downstream')])) %>%
dplyr::select(vector_name, sequence)
## Joining with `by = join_by(Construct)`
all_vectors <- bind_rows(all_vectors, mscar_df)
# TDP-REGv2 mScarlet vectors (pAAV)
all_mscar_AAVs <- Sys.glob("small_data_files/vector_sequences/fluorescent AAVs/*")
mscar_df_AAV <- map_df(all_mscar_AAVs, function(file) {
this_seq <- paste(readDNAStringSet(file)[1])
data.frame(filename = file, sequence = this_seq) %>%
filter(!str_detect(filename, 'b12')) %>%
mutate(vector_name = paste0('pAAV with ',
case_when(
str_detect(filename, 'a11') ~ paste0('TDP-REGv2 mScarlet #',
positions$Order[which(positions$Construct == 'A11')]),
str_detect(filename, 'f1') ~ 'TDP-REGv1 mCherry',
str_detect(filename, 'control') ~ 'mScarlet positive control'
))) %>%
select(vector_name, sequence)
})
all_vectors <- bind_rows(all_vectors, mscar_df_AAV) %>%
mutate(comments = 'Used for studying TDP-43-mediated regulation of fluorescent protein expression')
# TDP/RAVER vectors (pTwist CMV)
raver_ptwist_WT <- paste0(Biostrings::readDNAStringSet('small_data_files/vector_sequences/RAVER/tdp-43-raver1-only-with-mutations-to-remove-acceptors-in-raver.fasta')[1])
RV_WT_seq <- 'ATAGCAAAGGGTTCGGATT'
RV_2FL_seq <- 'ATAGCAAAGGGCTCGGACT'
raver_ptwist_2FL <- str_replace(raver_ptwist_WT, RV_WT_seq, RV_2FL_seq)
raver_df <- data.frame(vector_name = c('Constitutive TDP-43/Raver1 fusion in pTwist CMV',
'Constitutive TDP-43/Raver1 fusion with 2FL mutation in pTwist CMV'),
sequence = c(raver_ptwist_WT, raver_ptwist_2FL),
comments = 'Used for showing rescue of mScarlet reporters when co-transfected')
ordered_names <- data.frame(plasmid = c("Plasmid B03", "Plasmid B02",
"Plasmid B04", "Plasmid B07",
"Plasmid B08", "Plasmid B05",
"Plasmid B06", "Plasmid B09",
"Plasmid B11", "Plasmid B10"),
construct_number = factor(1:10)) %>%
mutate(code = word(plasmid, 2)) %>%
mutate(code = paste0(str_sub(code, 1, 1), as.numeric(str_sub(code, 2, 3))))
raver_eblock_df <- read_csv("small_data_files/vector_sequences/RAVER/August 2022 Eblock order - Sheet1.csv") %>%
dplyr::rename(code = `Well Position`) %>%
inner_join(ordered_names) %>%
mutate(construct_name = paste0('TDP-REGv2 TDP-43/Raver1 #', construct_number, ' with 2FL mutation in pTwist CMV vector')) %>%
mutate(backbone_trim_upstream = str_locate(raver_ptwist_WT, str_sub(Sequence, 1, 10))[, 1] - 1,
backbone_trim_downstream = str_locate(raver_ptwist_WT, str_sub(Sequence, -10, -1))[, 2] + 1) %>%
dplyr::rename(eblock = Sequence) %>%
mutate(sequence = paste0(str_sub(raver_ptwist_2FL, 1, backbone_trim_upstream),
eblock,
str_sub(raver_ptwist_2FL, backbone_trim_downstream, -1))) %>%
dplyr::select(vector_name = construct_name, sequence) %>%
mutate(comments = 'Used for initial test of TDP-43-encoding CEs by RT-PCR')
## Rows: 96 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Well Position, Name, Sequence
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining with `by = join_by(code)`
raver_df <- bind_rows(raver_df, raver_eblock_df)
# TDP/RAVER vectors (Piggybac for i3)
pig_up <- backbones$seq[which(backbones$seq_name == 'piggybac_upstream')]
pig_down <- backbones$seq[which(backbones$seq_name == 'piggybac_downstream')]
raver_ef1a_piggybacs <- raver_df %>%
filter(vector_name == 'Constitutive TDP-43/Raver1 fusion in pTwist CMV' |
str_detect(vector_name, "#6") | str_detect(vector_name, '#9')) %>%
bind_rows(data.frame(vector_name = 'mScarlet',
sequence = mscar_df$sequence[which(str_detect(mscar_df$vector_name, 'positive control'))])) %>%
mutate(trim_insert_start = str_locate(sequence, str_sub(pig_up, -12, -1))[, 2] + 1,
trim_insert_end = str_locate(sequence, str_sub(pig_down, 1, 10))[, 1] - 1) %>%
mutate(sequence = paste0(pig_up, str_sub(sequence, trim_insert_start, trim_insert_end), pig_down)) %>%
mutate(sequence = str_replace(sequence, RV_2FL_seq, RV_WT_seq)) %>%
mutate(vector_name = paste0(case_when(str_detect(vector_name, 'Constitutive') ~ 'Constitutive TDP-43/Raver1 fusion in ',
str_detect(vector_name, '#6') ~ 'TDP-REGv2 TDP-43/Raver1 #6 in ',
str_detect(vector_name, '#9') ~ 'TDP-REGv2 TDP-43/Raver1 #9 in ',
str_detect(vector_name, 'mScarlet') ~ 'mScarlet positive control in '),
'Piggybac vector with EF1A promoter')) %>%
dplyr::select(vector_name, sequence) %>%
mutate(comments = 'Used for testing rescue of cryptics in SK-N-BE2 cells.') %>%
mutate(comments = ifelse(str_detect(vector_name, 'onstitutive'), comments,
paste0(comments, ' Also used for generating i3 Neuron polyclonal lines.')))
# Also need to add piggybac mScarlet
raver_df <- bind_rows(raver_df, raver_ef1a_piggybacs)
# TET-inducible TDP/RAVER/BFP vectors
dream_files <- Sys.glob('small_data_files/vector_sequences/RAVER/Dream 3/*.fasta')
dream_vectors_df <- map_df(dream_files, function(file){
this_seq <- paste(readDNAStringSet(file)[1])
data.frame(filename = file, sequence = this_seq) %>%
mutate(vector_name = case_when(str_detect(filename, 'bfp') ~ 'TET-inducible BFP vector',
str_detect(filename, 'b5') ~ paste0('TET-inducible TDP-REGv2 TDP/Raver1 #', ordered_names$construct_number[which(ordered_names$code == 'B5')]),
str_detect(filename, 'b11') ~ paste0('TET-inducible TDP-REGv2 TDP/Raver1 #', ordered_names$construct_number[which(ordered_names$code == 'B11')]),
str_detect(filename, 'rv') ~ paste0('TET-inducible Constitutive TDP/Raver1'))) %>%
mutate(vector_name = paste0(vector_name, ' with mScarlet TET3G and BlasticidinR')) %>%
select(vector_name, sequence) %>%
mutate(comments = 'Used for growth competition assay')
})
raver_df <- bind_rows(raver_df, dream_vectors_df)
# TDP/RAVER vector (pAAV)
raver_AAV <- data.frame(sequence = Biostrings::readDNAStringSet('small_data_files/vector_sequences/RAVER/paav-raver-b11 (1).fasta')) %>%
mutate(vector_name = paste0('pAAV TDP-REGv2 TDP/Raver1 #', ordered_names$construct_number[which(ordered_names$code == 'B11')])) %>%
mutate(comments = 'Used for testing in mouse spinal cord') %>%
rownames_to_column('ignore') %>%
dplyr::select(-ignore)
raver_df <- bind_rows(raver_df, raver_AAV)
all_vectors <- bind_rows(all_vectors, raver_df)
# 12QN vectors
QN_files <- Sys.glob('small_data_files/vector_sequences/12QN/*.fasta')
QN_vectors_df <- map_df(QN_files, function(file){
this_seq <- paste(readDNAStringSet(file)[1])
data.frame(filename = file, sequence = this_seq) %>%
mutate(vector_name = case_when(str_detect(filename, 'just') ~ 'SNAP-only control in pcDNA3.1',
str_detect(filename, '12qn') ~ '12QN TDP-43 mutant in pcDNA3.1',
str_detect(filename, 'wt') ~ 'WT TDP-43 in pcDNA3.1')) %>%
select(vector_name, sequence) %>%
mutate(comments = 'Used for studying activation by TDP-43 aggregation')
})
all_vectors <- bind_rows(all_vectors, QN_vectors_df)
# Cre with 1, 2 or 3 CEs
cre_files <- Sys.glob('small_data_files/vector_sequences/Cre/*.fasta')
cre_vectors_df <- map_df(cre_files, function(file){
this_seq <- paste(readDNAStringSet(file)[1])
data.frame(filename = file, sequence = this_seq) %>%
mutate(vector_name = case_when(str_detect(filename, 'triple') ~ 'Cryptic Cre (3 CE) in pTwist CMV',
str_detect(filename, 'double') ~ 'Cryptic Cre (2 CEs) in pTwist CMV',
str_detect(filename, 'without') ~ 'Cryptic Cre (1 CEs) in pTwist CMV')) %>%
select(vector_name, sequence) %>%
mutate(comments = 'Used for testing the value of adding extra CEs to a sequence')
})
all_vectors <- bind_rows(all_vectors, cre_vectors_df)
# Prime editing
pe_files <- Sys.glob('small_data_files/vector_sequences/prime editing/*.fasta')
pe_vectors_df <- map_df(pe_files, function(file){
this_seq <- paste(readDNAStringSet(file)[1])
data.frame(filename = file, sequence = this_seq) %>%
mutate(vector_name = word(word(filename, 1, sep='\\.'), -1, sep='/')) %>%
select(vector_name, sequence) %>%
mutate(vector_name = str_replace(vector_name, '-code-12c', '')) %>%
mutate(comments = ifelse(str_detect(vector_name, 'flag'),
'Used for western blotting prime editing vectors',
'Used for studying if prime editing can be controlled by TDP-43 function'))
})
all_vectors <- bind_rows(all_vectors, pe_vectors_df)
# Luciferase
gluc_names_df <- data.frame(r_bc = 1:7) %>%
mutate(expected_rname = case_when(r_bc == 1 ~ "design1_Gluc",
r_bc == 2 ~ "Gluc_prepared",
r_bc == 3 ~ "Design2_Gluc_A2",
r_bc == 4 ~ "Design2_Gluc_A3",
r_bc == 5 ~ "Design2_Gluc_A5",
r_bc == 6 ~ "Design2_Gluc_A6",
r_bc == 7 ~ "Design2_Gluc_A7")) %>%
mutate(paper_name = case_when(r_bc == 1 ~ "TDP-REGv1",
r_bc == 2 ~ "+ve",
r_bc == 3 ~ "TDP-REGv2\n#1",
r_bc == 4 ~ "TDP-REGv2\n#2",
r_bc == 5 ~ "TDP-REGv2\n#3",
r_bc == 6 ~ "TDP-REGv2\n#4",
r_bc == 7 ~ "TDP-REGv2\n#5"))
gluc_fasta <- data.frame(insert = Biostrings::readDNAStringSet('small_data_files/vector_sequences/Gluc/gluc_fasta.fa')) %>%
rownames_to_column('expected_rname') %>%
inner_join(gluc_names_df) %>%
mutate(vector_name = paste0(case_when(str_detect(paper_name, 've') ~ 'GLUC luciferase positive control',
T ~ paste0('GLUC ', paper_name)),
' in pTwist-CMV')) %>%
mutate(sequence = paste0(backbones$seq[which(backbones$seq_name == 'gluc_upstream')],
insert,
backbones$seq[which(backbones$seq_name == 'gluc_downstream')])) %>%
dplyr::select(vector_name, sequence) %>%
mutate(comments = 'Used for studying TDP-43-mediated regulation of luciferase expression')
## Joining with `by = join_by(expected_rname)`
all_vectors <- bind_rows(all_vectors, gluc_fasta)
# NEGATIVE CONTROL mCherry HIS FLAG
his_df <- data.frame(sequence = paste(readDNAStringSet('small_data_files/vector_sequences/mCherry/cm1-triflag-7xhis.fasta')[1])) %>%
mutate(vector_name = 'mCherry negative control with HIS-tag and TriFlag-tag',
comments = 'Used for assessing leaky expression by western blotting') %>%
rownames_to_column('ignore') %>%
dplyr::select(-ignore)
all_vectors <- bind_rows(all_vectors, his_df)
# shRNA vectors
rbps <- c("tdp43", "FUS", "hnRNPA1", "hnRNPK", "hnRNPC")
filenames <- Sys.glob(paste0(data_dir, "/sami/*.csv"))
full_df <- map_df(rbps, function(rbp){
splash_filename <- filenames[which(str_detect(filenames, rbp) & str_detect(filenames, "splash"))]
top20_filename <- filenames[which(str_detect(filenames, rbp) & str_detect(filenames, "top20"))]
splash_df <- read_csv(splash_filename, skip = 3) %>%
dplyr::rename(seq = Antisense.Guide.Sequence)
top20_df <- read_csv(top20_filename) %>%
mutate(fixed_score = as.numeric(str_sub(score, 1, -2))) %>%
select(seq = shRNAsequence, top20_score = fixed_score)
both_df <- inner_join(splash_df, top20_df, by = "seq") %>%
mutate(combined_score = 1.488136*top20_score + SplashRNA) %>%
mutate(rbp = rbp)
})
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 20 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Feature, ID, shRNA.name, Antisense.Guide.Sequence, 97mer.construct,...
## dbl (3): SplashRNA, Mouse.22mer.match.genes, Human.22mer.match.genes
## lgl (2): Warnings, Mouse.match.entrezIDs
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 20 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): position_mRNA, shRNAsequence, targetsequence, score
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 20 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Feature, ID, shRNA.name, Antisense.Guide.Sequence, 97mer.construct,...
## dbl (3): SplashRNA, Mouse.22mer.match.genes, Human.22mer.match.genes
## lgl (2): Warnings, Mouse.match.entrezIDs
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 20 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): position_mRNA, shRNAsequence, targetsequence, score
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 20 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Feature, ID, shRNA.name, Antisense.Guide.Sequence, 97mer.construct,...
## dbl (3): SplashRNA, Mouse.22mer.match.genes, Human.22mer.match.genes
## lgl (1): Warnings
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 20 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): position_mRNA, shRNAsequence, targetsequence, score
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 20 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Feature, ID, shRNA.name, Antisense.Guide.Sequence, 97mer.construct,...
## dbl (3): SplashRNA, Mouse.22mer.match.genes, Human.22mer.match.genes
## lgl (2): Warnings, Mouse.match.entrezIDs
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 20 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): position_mRNA, shRNAsequence, targetsequence, score
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 20 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Feature, ID, shRNA.name, Antisense.Guide.Sequence, 97mer.construct,...
## dbl (3): SplashRNA, Mouse.22mer.match.genes, Human.22mer.match.genes
## lgl (1): Warnings
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 20 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): position_mRNA, shRNAsequence, targetsequence, score
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
### Get best shRNA sequences
best_df <- full_df %>%
group_by(rbp) %>%
slice_max(combined_score, n=3)
ggplot(full_df, aes(x = SplashRNA, y = top20_score, size = combined_score, colour = seq %in% best_df$seq)) +
geom_point() +
facet_wrap(~rbp)
best_df2 <- best_df %>%
mutate(just_insert = str_replace(str_replace(`97mer.construct`, "TGCTGTTGACAGTGAGCG", ""), "TGCCTACTGCCTCGGA", "")) %>%
mutate(for_gib = paste0("at", `97mer.construct`, "ct")) %>%
mutate(forward_oligo = str_sub(for_gib, -60,-1),
reverse_oligo = rc(str_sub(for_gib, 1, 60)))
shRNA_constructs <- best_df2 %>%
ungroup() %>%
group_by(rbp) %>%
mutate(gib_name = paste0(rbp, "_", 1:n())) %>%
ungroup() %>%
select(gib_name, forward_oligo, reverse_oligo, construct = `97mer.construct`) %>%
pivot_longer(cols = c("forward_oligo", "reverse_oligo")) %>%
mutate(oligo_name = paste0("sh_", gib_name, ifelse(str_detect(name, "forward"), "_F", "_R"))) %>%
arrange(name, gib_name) %>%
mutate(oligo_name = str_sub(oligo_name, 1, -3)) %>%
dplyr::select(oligo_name, construct) %>%
distinct() %>%
mutate(vector_name = oligo_name,
sequence = paste0(backbones$seq[which(backbones$seq_name == 'sami_upstream')],
construct,
backbones$seq[which(backbones$seq_name == 'sami_downstream')])) %>%
dplyr::select(vector_name, sequence) %>%
bind_rows(data.frame(vector_name = 'Negative control shRNA plasmid',
sequence = this_seq <- paste(readDNAStringSet('small_data_files/vector_sequences/shRNA/piggybac-blasticidin-with-mgl-and-shrna-and-xmai-cut-site.fasta')[1]))) %>%
mutate(comments = 'Used to test specificity of TDP-REG vectors to TDP-43 knockdown')
all_vectors <- bind_rows(all_vectors, shRNA_constructs)
# BSBS vectors
bsbs <- data.frame(insert = Biostrings::readDNAStringSet('small_data_files/vector_sequences/bsbs/full_reference_fasta.fa')) %>%
rownames_to_column('original_vector_name') %>%
mutate(attempt = word(original_vector_name, 2, sep='_'),
optimisation_level = word(original_vector_name, 3, sep="_")) %>%
mutate(vector_name = paste0("SpliceNouveau test attempt ", attempt, " with optimisation level ", optimisation_level)) %>%
mutate(trim_gluc_up = str_locate(backbones$seq[which(backbones$seq_name == "gluc_upstream")], str_sub(insert, 1, 10))[,1] - 1) %>%
mutate(trim_gluc_down = str_locate(backbones$seq[which(backbones$seq_name == "gluc_downstream")], str_sub(insert, -10, -1))[,2] + 1) %>%
mutate(sequence = paste0(str_sub(backbones$seq[which(backbones$seq_name == "gluc_upstream")], 1, trim_gluc_up),
insert,
str_sub(backbones$seq[which(backbones$seq_name == "gluc_downstream")], trim_gluc_down, -1))) %>%
dplyr::select(vector_name, sequence) %>%
mutate(comments = 'Used for testing SpliceNouveau optimisation versus performance')
all_vectors <- bind_rows(all_vectors, bsbs)
write_csv(all_vectors, 'All_vectors_used_in_study.csv')